From: Andrew Moore Date: Fri, 18 Jun 1993 13:00:14 +0000 (+0000) Subject: POSIX ed version 0.6 by Andrew Moore (alm@netcom.com). X-Git-Tag: FreeBSD-release/1.0~16 X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/commitdiff_plain/54a7a3ed8e6c9c8e2a9bba3de42a60fd39ff3489 POSIX ed version 0.6 by Andrew Moore (alm@netcom.com). --- diff --git a/bin/ed/Makefile b/bin/ed/Makefile new file mode 100644 index 0000000000..a48b0d20fd --- /dev/null +++ b/bin/ed/Makefile @@ -0,0 +1,7 @@ +PROG= ed +CFLAGS+=-I${.CURDIR} -DVI_BANG -DDES -DGNU_REGEX -DHAVE_STRING_H=1 +SRCS= ed.c re.c buf.c cbc.c regex.c +LINKS= ${BINDIR}/ed ${BINDIR}/red +MLINKS= ed.1 red.1 + +.include diff --git a/bin/ed/POSIX b/bin/ed/POSIX new file mode 100644 index 0000000000..dd506fb5a6 --- /dev/null +++ b/bin/ed/POSIX @@ -0,0 +1,50 @@ +This version of ed is not strictly POSIX compliant, as described in the +POSIX 1003.2 Draft 11.2 document. BSD commands have been implemented +wherever they do not conflict with the POSIX standard. For backwards +compatibility, the POSIX rule that says a range of addresses cannot be +used where only a single address is expected has been relaxed. + +The BSD commands included are: + 1) `s' (i.e., s[rgp]*) to repeat a previous substitution, + 2) `W' for appending text to an existing file, + 3) `wq' for exiting after a write, and + 4) `z' for scrolling through the buffer. +BSD line addressing syntax (i.e., `^' and `%'). is also recognized. + +The POSIX interactive global commands `G' and `V' are extended to support +multiple commands, including `a', `i' and `c'. The command format is the +same as for the global commands `g' and `v', i.e., one command per line +with each line, except for the last, ending in a backslash (\). + +If crypt is available, files can be read and written using DES encryption. +The `x' command prompts the user to enter a key used for encrypting/ +decrypting subsequent reads and writes. If only a newline is entered as +the key, then encryption is disabled. Otherwise, a key is read in the +same manner as a password entry. The key remains in effect until +encryption is disabled. For more information on the encryption algorithm, +see the bdes(1) man page. Encryption/decryption should be fully compatible +with SunOS DES. + +An extension to the POSIX file commands `E', `e', `r', `W' and `w' is that + arguments are processed for backslash escapes, i.e., any character +preceded by a backslash is interpreted literally. If the first unescaped +character of a argument is a bang (!), then the rest of the line +is interpreted as a shell command, and no escape processing is performed +by ed. + +The vi editor's bang command syntax is supported, i.e., +(addr1,addr2) ! replaces the addressed lines with the output of + the command . +[rwe] !! reads/writes/edits the previous !. + +If ed is invoked with a name argument prefixed by a bang, then the +remainder of the argument is interpreted as a shell command. To invoke +ed on a file whose name starts with bang, prefix the name with a backslash. + +ed runs in restricted mode if invoked as red. This limits editing of +files in the local directory only and prohibits ! commands. + +Though ed is not a binary editor, it can be used (if painfully) to edit +binary files. To assist in binary editing, when a file containing at +least one ASCII NUL character is written, a newline is not appended +if it did not already contain one upon reading. diff --git a/bin/ed/README b/bin/ed/README new file mode 100644 index 0000000000..9dada652ad --- /dev/null +++ b/bin/ed/README @@ -0,0 +1,22 @@ +ed is an 8-bit-clean, POSIX-compliant line editor. It should work with +any regular expression package that conforms to the POSIX interface +standard, such as GNU regex(3). + +If reliable signals are supported (e.g., POSIX sigaction(2)), it should +compile with little trouble. Otherwise, the macros spl1() and spl0() +should be redefined to disable interrupts. + +The following compiler directives are recognized: +GNU_REGEX - use with GNU regex(3) +DES - use to add encryption support (requires crypt(3)) +NO_REALLOC_NULL - use if realloc(3) does not accept a NULL pointer +BACKWARDS - use for backwards compatibility + +The file `POSIX' describes extensions to and deviations from the POSIX +standard. + +The ./test directory contains regression tests for ed. The README +file in that directory explains how to run these. + +For a description of the ed algorithm, see Kernighan and Plauger's book +"Software Tools in Pascal," Addison-Wesley, 1981. diff --git a/bin/ed/buf.c b/bin/ed/buf.c new file mode 100644 index 0000000000..57c336121c --- /dev/null +++ b/bin/ed/buf.c @@ -0,0 +1,246 @@ +/* buf.c: This file contains the scratch-file buffer rountines for the + ed line editor. */ +/*- + * Copyright (c) 1992 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rodney Ruddock of the University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)buf.c 5.5 (Berkeley) 3/28/93"; +#endif /* not lint */ + +#include +#include +#include +#include +#include + +#include "ed.h" + +extern char errmsg[]; +extern line_t line0; + +FILE *sfp; /* scratch file pointer */ +char *sfbuf = NULL; /* scratch file input buffer */ +int sfbufsz = 0; /* scratch file input buffer size */ +off_t sfseek; /* scratch file position */ +int seek_write; /* seek before writing */ + +/* gettxt: get a line of text from the scratch file; return pointer + to the text */ +char * +gettxt(lp) + line_t *lp; +{ + int len, ct; + + if (lp == &line0) + return NULL; + seek_write = 1; /* force seek on write */ + /* out of position */ + if (sfseek != lp->seek) { + sfseek = lp->seek; + if (fseek(sfp, sfseek, SEEK_SET) < 0) { + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "cannot seek temp file"); + return NULL; + } + } + len = lp->len & ~ACTV; + CKBUF(sfbuf, sfbufsz, len + 1, NULL); + if ((ct = fread(sfbuf, sizeof(char), len, sfp)) < 0 || ct != len) { + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "cannot read temp file"); + return NULL; + } + sfseek += len; /* update file position */ + sfbuf[len] = '\0'; + return sfbuf; +} + + +extern long curln; +extern long lastln; + +/* puttxt: write a line of text to the scratch file and add a line node + to the editor buffer; return a pointer to the end of the text */ +char * +puttxt(cs) + char *cs; +{ + line_t *lp; + int len, ct; + char *s; + + if ((lp = (line_t *) malloc(sizeof(line_t))) == NULL) { + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "out of memory"); + return NULL; + } + /* assert: cs is '\n' terminated */ + for (s = cs; *s != '\n'; s++) + ; + if (s - cs >= LINECHARS) { + sprintf(errmsg, "line too long"); + return NULL; + } + len = (s - cs) & ~ACTV; + /* out of position */ + if (seek_write) { + if (fseek(sfp, 0L, SEEK_END) < 0) { + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "cannot seek temp file"); + return NULL; + } + sfseek = ftell(sfp); + seek_write = 0; + } + /* assert: spl1() */ + if ((ct = fwrite(cs, sizeof(char), len, sfp)) < 0 || ct != len) { + sfseek = -1; + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "cannot write temp file"); + return NULL; + } + lp->len = len; + lp->seek = sfseek; + lpqueue(lp); + sfseek += len; /* update file position */ + return ++s; +} + + +/* lpqueue: add a line node in the editor buffer after the current line */ +void +lpqueue(lp) + line_t *lp; +{ + line_t *cp; + + cp = getlp(curln); /* this getlp last! */ + insqueue(lp, cp); + lastln++; + curln++; +} + + +/* getaddr: return line number of pointer */ +long +getaddr(lp) + line_t *lp; +{ + line_t *cp = &line0; + long n = 0; + + while (cp != lp && (cp = cp->next) != &line0) + n++; + return (cp != &line0) ? n : 0; +} + + +/* getlp: return pointer to a line node in the editor buffer */ +line_t * +getlp(n) + long n; +{ + static line_t *lp = &line0; + static long on = 0; + + spl1(); + if (n > on) + if (n <= (on + lastln) >> 1) + for (; on < n; on++) + lp = lp->next; + else { + lp = line0.prev; + for (on = lastln; on > n; on--) + lp = lp->prev; + } + else + if (n >= on >> 1) + for (; on > n; on--) + lp = lp->prev; + else { + lp = &line0; + for (on = 0; on < n; on++) + lp = lp->next; + } + spl0(); + return lp; +} + + +char sfn[15] = ""; /* scratch file name */ + +/* sbopen: open scratch file */ +sbopen() +{ + strcpy(sfn, "/tmp/ed.XXXXXX"); + if (mktemp(sfn) == NULL || (sfp = fopen(sfn, "w+")) == NULL) { + fprintf(stderr, "%s: %s\n", sfn, strerror(errno)); + sprintf(errmsg, "cannot open temp file"); + return ERR; + } + return 0; +} + + +/* sbclose: close scratch file */ +sbclose() +{ + if (sfp) { + if (fclose(sfp) < 0) { + fprintf(stderr, "%s: %s\n", sfn, strerror(errno)); + sprintf(errmsg, "cannot close temp file"); + return ERR; + } + sfp = NULL; + unlink(sfn); + } + sfseek = seek_write = 0; + return 0; +} + + +/* quit: remove scratch file and exit */ +void +quit(n) + int n; +{ + if (sfp) { + fclose(sfp); + unlink(sfn); + } + exit(n); +} diff --git a/bin/ed/cbc.c b/bin/ed/cbc.c new file mode 100644 index 0000000000..95ce63cd29 --- /dev/null +++ b/bin/ed/cbc.c @@ -0,0 +1,435 @@ +/* cbc.c: This file contains the encryption routines for the ed line editor */ +/*- + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Matt Bishop of Dartmouth College. + * + * The United States Government has rights in this work pursuant + * to contract no. NAG 2-680 between the National Aeronautics and + * Space Administration and Dartmouth College. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)cbc.c 5.5 (Berkeley) 6/27/91"; +#endif /* not lint */ + +/* Author: Matt Bishop + * Department of Mathematics and Computer Science + * Dartmouth College + * Hanover, NH 03755 + * Email: Matt.Bishop@dartmouth.edu + * ...!decvax!dartvax!Matt.Bishop + * + * See Technical Report PCS-TR91-158, Department of Mathematics and Computer + * Science, Dartmouth College, for a detailed description of the implemen- + * tation and differences between it and Sun's. The DES is described in + * FIPS PUB 46, and the modes in FIPS PUB 81 (see either the manual page + * or the technical report for a complete reference). + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ed.h" + +/* + * Define a divisor for rand() that yields a uniform distribution in the + * range 0-255. + */ +#define RAND_DIV (((unsigned) RAND_MAX + 1) >> 8) + +/* + * BSD and System V systems offer special library calls that do + * block moves and fills, so if possible we take advantage of them + */ +#define MEMCPY(dest,src,len) memcpy((dest),(src),(len)) +#define MEMZERO(dest,len) memset((dest), 0, (len)) + +/* Hide the calls to the primitive encryption routines. */ +#define DES_KEY(buf) \ + if (des_setkey(buf)) \ + err("des_setkey"); +#define DES_XFORM(buf) \ + if (des_cipher(buf, buf, 0L, (inverse ? -1 : 1))) \ + err("des_cipher"); + +/* + * read/write - no error checking + */ +#define READ(buf, n, fp) fread(buf, sizeof(char), n, fp) +#define WRITE(buf, n, fp) fwrite(buf, sizeof(char), n, fp) + +/* + * some things to make references easier + */ +typedef char Desbuf[8]; +#define CHAR(x,i) (x[i]) +#define UCHAR(x,i) (x[i]) +#define BUFFER(x) (x) +#define UBUFFER(x) (x) + +/* + * global variables and related macros + */ + +enum { /* encrypt, decrypt, authenticate */ + MODE_ENCRYPT, MODE_DECRYPT, MODE_AUTHENTICATE +} mode = MODE_ENCRYPT; + +Desbuf ivec; /* initialization vector */ +Desbuf pvec; /* padding vector */ +char bits[] = { /* used to extract bits from a char */ + '\200', '\100', '\040', '\020', '\010', '\004', '\002', '\001' +}; +int pflag; /* 1 to preserve parity bits */ + +char des_buf[8]; /* shared buffer for desgetc/desputc */ +int des_ct = 0; /* count for desgetc/desputc */ +int des_n = 0; /* index for desputc/desgetc */ + + +/* desinit: initialize DES */ +void +desinit() +{ +#ifdef DES + int i; + + des_ct = des_n = 0; + + /* initialize the initialization vctor */ + MEMZERO(ivec, 8); + + /* intialize the padding vector */ + srand((unsigned) time((time_t *) 0)); + for (i = 0; i < 8; i++) + CHAR(pvec, i) = (char) (rand()/RAND_DIV); +#endif +} + + +/* desgetc: return next char in an encrypted file */ +desgetc(fp) + FILE *fp; +{ +#ifdef DES + if (des_n >= des_ct) { + des_n = 0; + des_ct = cbcdec(des_buf, fp); + } + return (des_ct > 0) ? des_buf[des_n++] : EOF; +#endif +} + + +/* desputc: write a char to an encrypted file; return char written */ +desputc(c, fp) + int c; + FILE *fp; +{ +#ifdef DES + if (des_n == sizeof des_buf) { + des_ct = cbcenc(des_buf, des_n, fp); + des_n = 0; + } + return (des_ct >= 0) ? (des_buf[des_n++] = c) : EOF; +#endif +} + + +/* desflush: flush an encrypted file's output; return status */ +desflush(fp) + FILE *fp; +{ +#ifdef DES + if (des_n == sizeof des_buf) { + des_ct = cbcenc(des_buf, des_n, fp); + des_n = 0; + } + return (des_ct >= 0 && cbcenc(des_buf, des_n, fp) >= 0) ? 0 : EOF; +#endif +} + +#ifdef DES +/* + * get keyword from tty or stdin + */ +getkey() +{ + register char *p; /* used to obtain the key */ + Desbuf msgbuf; /* I/O buffer */ + + /* + * get the key + */ + if (*(p = getpass("Enter key: "))) { + + /* + * copy it, nul-padded, into the key area + */ + cvtkey(BUFFER(msgbuf), p); + MEMZERO(p, _PASSWORD_LEN); + makekey(msgbuf); + MEMZERO(msgbuf, sizeof msgbuf); + return 1; + } + return 0; +} + + +extern char errmsg[]; + +/* + * print a warning message and, possibly, terminate + */ +void +err(s) + char *s; /* the message */ +{ + (void)sprintf(errmsg, "%s", s ? s : strerror(errno)); +} + +/* + * map a hex character to an integer + */ +tobinhex(c, radix) + int c; /* char to be converted */ + int radix; /* base (2 to 16) */ +{ + switch(c) { + case '0': return(0x0); + case '1': return(0x1); + case '2': return(radix > 2 ? 0x2 : -1); + case '3': return(radix > 3 ? 0x3 : -1); + case '4': return(radix > 4 ? 0x4 : -1); + case '5': return(radix > 5 ? 0x5 : -1); + case '6': return(radix > 6 ? 0x6 : -1); + case '7': return(radix > 7 ? 0x7 : -1); + case '8': return(radix > 8 ? 0x8 : -1); + case '9': return(radix > 9 ? 0x9 : -1); + case 'A': case 'a': return(radix > 10 ? 0xa : -1); + case 'B': case 'b': return(radix > 11 ? 0xb : -1); + case 'C': case 'c': return(radix > 12 ? 0xc : -1); + case 'D': case 'd': return(radix > 13 ? 0xd : -1); + case 'E': case 'e': return(radix > 14 ? 0xe : -1); + case 'F': case 'f': return(radix > 15 ? 0xf : -1); + } + /* + * invalid character + */ + return(-1); +} + +/* + * convert the key to a bit pattern + */ +void +cvtkey(obuf, ibuf) + char *obuf; /* bit pattern */ + char *ibuf; /* the key itself */ +{ + register int i, j; /* counter in a for loop */ + int nbuf[64]; /* used for hex/key translation */ + + /* + * leading '0x' or '0X' == hex key + */ + if (ibuf[0] == '0' && (ibuf[1] == 'x' || ibuf[1] == 'X')) { + ibuf = &ibuf[2]; + /* + * now translate it, bombing on any illegal hex digit + */ + for (i = 0; ibuf[i] && i < 16; i++) + if ((nbuf[i] = tobinhex((int) ibuf[i], 16)) == -1) + err("bad hex digit in key"); + while (i < 16) + nbuf[i++] = 0; + for (i = 0; i < 8; i++) + obuf[i] = + ((nbuf[2*i]&0xf)<<4) | (nbuf[2*i+1]&0xf); + /* preserve parity bits */ + pflag = 1; + return; + } + /* + * leading '0b' or '0B' == binary key + */ + if (ibuf[0] == '0' && (ibuf[1] == 'b' || ibuf[1] == 'B')) { + ibuf = &ibuf[2]; + /* + * now translate it, bombing on any illegal binary digit + */ + for (i = 0; ibuf[i] && i < 16; i++) + if ((nbuf[i] = tobinhex((int) ibuf[i], 2)) == -1) + err("bad binary digit in key"); + while (i < 64) + nbuf[i++] = 0; + for (i = 0; i < 8; i++) + for (j = 0; j < 8; j++) + obuf[i] = (obuf[i]<<1)|nbuf[8*i+j]; + /* preserve parity bits */ + pflag = 1; + return; + } + /* + * no special leader -- ASCII + */ + (void)strncpy(obuf, ibuf, 8); +} + +/***************** + * DES FUNCTIONS * + *****************/ +/* + * This sets the DES key and (if you're using the deszip version) + * the direction of the transformation. This uses the Sun + * to map the 64-bit key onto the 56 bits that the key schedule + * generation routines use: the old way, which just uses the user- + * supplied 64 bits as is, and the new way, which resets the parity + * bit to be the same as the low-order bit in each character. The + * new way generates a greater variety of key schedules, since many + * systems set the parity (high) bit of each character to 0, and the + * DES ignores the low order bit of each character. + */ +void +makekey(buf) + Desbuf buf; /* key block */ +{ + register int i, j; /* counter in a for loop */ + register int par; /* parity counter */ + + /* + * if the parity is not preserved, flip it + */ + if (!pflag) { + for (i = 0; i < 8; i++) { + par = 0; + for (j = 1; j < 8; j++) + if ((bits[j]&UCHAR(buf, i)) != 0) + par++; + if ((par&01) == 01) + UCHAR(buf, i) = UCHAR(buf, i)&0177; + else + UCHAR(buf, i) = (UCHAR(buf, i)&0177)|0200; + } + } + + DES_KEY(UBUFFER(buf)); +} + + +/* + * This encrypts using the Cipher Block Chaining mode of DES + */ +cbcenc(msgbuf, n, fp) + char *msgbuf; + int n; + FILE *fp; +{ + int inverse = 0; /* 0 to encrypt, 1 to decrypt */ + + /* + * do the transformation + */ + if (n == 8) { + for (n = 0; n < 8; n++) + CHAR(msgbuf, n) ^= CHAR(ivec, n); + DES_XFORM(UBUFFER(msgbuf)); + MEMCPY(BUFFER(ivec), BUFFER(msgbuf), 8); + return WRITE(BUFFER(msgbuf), 8, fp); + } + /* + * at EOF or last block -- in either case, the last byte contains + * the character representation of the number of bytes in it + */ +/* + MEMZERO(msgbuf + n, 8 - n); +*/ + /* + * Pad the last block randomly + */ + (void)MEMCPY(BUFFER(msgbuf + n), BUFFER(pvec), 8 - n); + CHAR(msgbuf, 7) = n; + for (n = 0; n < 8; n++) + CHAR(msgbuf, n) ^= CHAR(ivec, n); + DES_XFORM(UBUFFER(msgbuf)); + return WRITE(BUFFER(msgbuf), 8, fp); +} + +/* + * This decrypts using the Cipher Block Chaining mode of DES + */ +cbcdec(msgbuf, fp) + char *msgbuf; /* I/O buffer */ + FILE *fp; /* input file descriptor */ +{ + Desbuf ibuf; /* temp buffer for initialization vector */ + register int n; /* number of bytes actually read */ + register int c; /* used to test for EOF */ + int inverse = 1; /* 0 to encrypt, 1 to decrypt */ + + if ((n = READ(BUFFER(msgbuf), 8, fp)) == 8) { + /* + * do the transformation + */ + MEMCPY(BUFFER(ibuf), BUFFER(msgbuf), 8); + DES_XFORM(UBUFFER(msgbuf)); + for (c = 0; c < 8; c++) + UCHAR(msgbuf, c) ^= UCHAR(ivec, c); + MEMCPY(BUFFER(ivec), BUFFER(ibuf), 8); + /* + * if the last one, handle it specially + */ + if ((c = fgetc(fp)) == EOF) { + n = CHAR(msgbuf, 7); + if (n < 0 || n > 7) { + err("decryption failed (block corrupted)"); + return EOF; + } + } else + (void)ungetc(c, fp); + return n; + } + if (n > 0) + err("decryption failed (incomplete block)"); + else if (n < 0) + err("cannot read file"); + return EOF; +} +#endif /* DES */ diff --git a/bin/ed/ed.1 b/bin/ed/ed.1 new file mode 100644 index 0000000000..ff6a8499ff --- /dev/null +++ b/bin/ed/ed.1 @@ -0,0 +1,980 @@ +.TH ED 1 "21 May 1993" +.SH NAME +ed, red \- text editor +.SH SYNOPSIS +ed [-] [-sx] [-p \fIstring\fR] [\fIfile\fR] +.LP +red [-] [-sx] [-p \fIstring\fR] [\fIfile\fR] +.SH DESCRIPTION +.B ed +is a line-oriented text editor. +It is used to create, display, modify and otherwise manipulate text +files. +.B red +is a restricted +.BR ed : +it can only edit files in the current +directory and cannot execute shell commands. + +If invoked with a +.I file +argument, then a copy of +.I file +is read into the editor's buffer. +Changes are made to this copy and not directly to +.I file +itself. +Upon quitting +.BR ed , +any changes not explicitly saved with a +.I `w' +command are lost. + +Editing is done in two distinct modes: +.I command +and +.IR input . +When first invoked, +.B ed +is in command mode. +In this mode commands are read from the standard input and +executed to manipulate the contents of the editor buffer. +A typical command might look like: +.sp +.RS +,s/\fIold\fR/\fInew\fR/g +.RE +.sp +which replaces all occurences of the string +.I old +with +.IR new . + +When an input command, such as +.I `a' +(append), +.I `i' +(insert) or +.I `c' +(change), is given, +.B ed +enters input mode. This is the primary means +of adding text to a file. +In this mode, no commands are available; +instead, the standard input is written +directly to the editor buffer. Lines consist of text up to and +including a +.IR newline +character. +Input mode is terminated by +entering a single period (\fI.\fR) on a line. + +All +.B ed +commands operate on whole lines or ranges of lines; e.g., +the +.I `d' +command deletes lines; the +.I `m' +command moves lines, and so on. +It is possible to modify only a portion of a line by means of replacement, +as in the example above. However even here, the +.I `s' +command is applied to whole lines at a time. + +In general, +.B ed +commands consist of zero or more line addresses, followed by a single +character command and possibly additional parameters; i.e., +commands have the structure: +.sp +.RS +.I [address [,address]]command[parameters] +.RE +.sp +The address(es) indicate the line(s) to be affected by the command. +If fewer addresses are given than the command accepts, then default +addresses are supplied. + +.SS OPTIONS +.TP 8 +-s +Suppresses diagnostics. This should be used if +.BR ed 's +standard input is from a script. + +.TP 8 +-x +Prompts for an encryption key to be used in subsequent reads and writes +(see the +.I `x' +command). + +.TP 8 +.RI \-p \ string +Specifies a command prompt. This may be toggled on and off with the +.I `P' +command. + +.TP 8 +.I file +Specifies the name of a file to read. If +.I file +is prefixed with a +bang (!), then it is interpreted as a shell command. In this case, +what is read is +the standard output of +.I file +executed via +.IR sh (1). +To read a file whose name begins with a bang, prefix the +name with a backslash (\\). +The default filename is set to +.I file +only if it is not prefixed with a bang. + +.SS LINE ADDRESSING +An address represents the number of line in the buffer. +.B ed +maintains a +.I current address +which is +typically supplied to commands as the default address when none is specified. +When a file is first read, the current address is set to the last line +of the file. In general, the current address is set to the last line +affected by a command. + +A line address is +constructed from one of the bases in the list below, optionally followed +by a numeric offset. The offset may include any combination +of digits, operators (i.e., +.IR + , +.I - +and +.IR ^ ) +and whitespace. +Addresses are read from left to right, and their values are computed +relative to the current address. + +One exception to the rule that addresses represent line numbers is the +address +.I 0 +(zero). +This means "before the first line," +and is legal wherever it makes sense. + +An address range is two addresses separated either by a comma or +semi-colon. The value of the first address in a range cannot exceed the +value of the the second. If an +.IR n- tuple +of addresses is given where +.I n > 2, +then the corresponding range is determined by the last two addresses +in the +.IR n- tuple. +If only one address is expected, then the last +address is used. + +Each address in a comma-delimited range is interpreted relative to the +current address. In a semi-colon-delimited range, the first address is +used to set the current address, and the second address is interpreted +relative to the first. + +The following address symbols are recognized. + +.TP 8 +\fR.\fR +The current line (address) in the buffer. + +.TP 8 +$ +The last line in the buffer. + +.TP 8 +n +The +.IR n th, +line in the buffer +where +.I n +is a number in the range +.I [0,$]. + +.TP 8 +- or ^ +The previous line. +This is equivalent to +.I -1 +and may be repeated with cumulative effect. + +.TP 8 +-\fIn\fR or ^\fIn\fR +The +.IR n th +previous line, where +.I n +is a non-negative number. + +.TP 8 ++ +The +next line. +This is equivalent to +.I +1 +and may be repeated with cumulative effect. + +.TP 8 ++\fIn\fR or whitespace\fIn\fR +The +.IR n th +next line, where +.I n +is a non-negative number. +.I whitespace +followed by a number +.I n +is interpreted as +.IR +n . + +.TP 8 +, \fRor\fB % +The first through last lines in the buffer. This is equivalent to +the address range +.I 1,$. + +.TP 8 +; +The +current through last lines in the buffer. This is equivalent to +the address range +.I .,$. + +.TP 8 +.RI / re/ +The +next line containing the regular expression +.IR re . +The search wraps to the beginning of the buffer and continues down to the +current line, if necessary. +// repeats the last search. + +.TP 8 +.RI ? re? +The +previous line containing the regular expression +.IR re . +The search wraps to the end of the buffer and continues up to the +current line, if necessary. +?? repeats the last search. + +.TP 8 +.RI \' lc +The +line previously marked by a +.I `k' +(mark) command, where +.I lc +is a lower case letter. + +.SS REGULAR EXPRESSIONS +Regular expressions are patterns used in selecting text. +For example, the +.B ed +command +.sp +.RS +g/\fIstring\fR/ +.RE +.sp +prints all lines containing +.IR string . +Regular expressions are also +used by the +.I `s' +command for selecting old text to be replaced with new. + +In addition to a specifying string literals, regular expressions can +represent +classes of strings. Strings thus represented are said to be matched +by the corresponding regular expression. +If it is possible for a regular expression +to match several strings in a line, then the left-most longest match is +the one selected. + +The following symbols are used in constructing regular expressions: + +.TP 8 +c +Any character +.I c +not listed below, including `{', '}', `(', `)', `<' and `>', +matches itself. + +.TP 8 +\fR\\\fIc\fR +Any backslash-escaped character +.IR c , +except for `{', '}', `(', `)', `<' and `>', +matches itself. + +.TP 8 +\fR.\fR +Matches any single character. + +.TP 8 +.I [char-class] +Matches any single character in +.IR char-class . +To include a `]' +in +.IR char-class , +it must be the first character. +A range of characters may be specified by separating the end characters +of the range with a `-', e.g., `a-z' specifies the lower case characters. +The following literal expressions can also be used in +.I char-class +to specify sets of characters: +.sp +\ \ [:alnum:]\ \ [:cntrl:]\ \ [:lower:]\ \ [:space:] +.PD 0 +\ \ [:alpha:]\ \ [:digit:]\ \ [:print:]\ \ [:upper:] +.PD 0 +\ \ [:blank:]\ \ [:graph:]\ \ [:punct:]\ \ [:xdigit:] +.sp +If `-' appears as the first or last +character of +.IR char-class , +then it matches itself. +All other characters in +.I char-class +match themselves. +.sp +Patterns in +.I char-class +of the form: +.sp +\ \ [.\fIcol-elm\fR.] or, +.PD 0 +\ \ [=\fIcol-elm\fR=] +.sp +where +.I col-elm +is a +.I collating element +are interpreted according to +.IR locale (5) +(not currently supported). +See +.IR regex (3) +for an explanation of these constructs. + +.TP 8 +[^\fIchar-class\fR] +Matches any single character, other than newline, not in +.IR char-class . +.IR char-class +is defined +as above. + +.TP 8 +^ +If `^' is the first character of a regular expression, then it +anchors the regular expression to the beginning of a line. +Otherwise, it matches itself. + +.TP 8 +$ +If `$' is the last character of a regular expression, it +anchors the regular expression to the end of a line. +Otherwise, it matches itself. + +.TP 8 +\fR\\<\fR +Anchors the single character regular expression or subexpression +immediately following it to the beginning of a word. +(This may not be available) + +.TP 8 +\fR\\>\fR +Anchors the single character regular expression or subexpression +immediately following it to the end of a word. +(This may not be available) + +.TP 8 +\fR\\(\fIre\fR\\)\fR +Defines a subexpression +.IR re . +Subexpressions may be nested. +A subsequent backreference of the form \fI`\\n'\fR, where +.I n +is a number in the range [1,9], expands to the text matched by the +.IR n th +subexpression. +For example, the regular expression `\\(.*\\)\\1' matches any string +consisting of identical adjacent substrings. +Subexpressions are ordered relative to +their left delimiter. + +.TP 8 +* +Matches the single character regular expression or subexpression +immediately preceding it zero or more times. If '*' is the first +character of a regular expression or subexpression, then it matches +itself. The `*' operator sometimes yields unexpected results. +For example, the regular expression `b*' matches the beginning of +the string `abbb' (as opposed to the substring `bbb'), since a null match +is the only left-most match. + +.TP 8 +\fR\\{\fIn,m\fR\\}\fR or \fR\\{\fIn,\fR\\}\fR or \fR\\{\fIn\fR\\}\fR +Matches the single character regular expression or subexpression +immediately preceding it at least +.I n +and at most +.I m +times. +If +.I m +is omitted, then it matches at least +.I n +times. +If the comma is also omitted, then it matches exactly +.I n +times. + +.LP +Additional regular expression operators may be defined depending on the +particular +.IR regex (3) +implementation. + +.SS COMMANDS +All +.B ed +commands are single characters, though some require additonal parameters. +If a command's parameters extend over several lines, then +each line except for the last +must be terminated with a backslash (\\). + +In general, at most one command is allowed per line. +However, most commands accept a print suffix, which is any of +.I `p' +(print), +.I `l' +(list) , +or +.I `n' +(enumerate), +to print the last line affected by the command. + +An interrupt (typically ^C) has the effect of aborting the current command +and returning the editor to command mode. + +.B ed +recognizes the following commands. The commands are shown together with +the default address or address range supplied if none is +specified (in parenthesis). + +.TP 8 +(.)a +Appends text to the buffer after the addressed line. +Text is entered in input mode. +The current address is set to last line entered. + +.TP 8 +(.,.)c +Changes lines in the buffer. The addressed lines are deleted +from the buffer, and text is appended in their place. +Text is entered in input mode. +The current address is set to last line entered. + +.TP 8 +(.,.)d +Deletes the addressed lines from the buffer. +If there is a line after the deleted range, then the current address is set +to this line. Otherwise the current address is set to the line +before the deleted range. + +.TP 8 +.RI e \ file +Edits +.IR file , +and sets the default filename. +If +.I file +is not specified, then the default filename is used. +Any lines in the buffer are deleted before +the new file is read. +The current address is set to the last line read. + +.TP 8 +.RI e \ !command +Edits the standard output of +.IR `!command' , +executed as described below. +The default filename is unchanged. +Any lines in the buffer are deleted before the output of +.I command +is read. +The current address is set to the last line read. + +.TP 8 +.RI E \ file +Edits +.I file +unconditionally. +This is similar to the +.I e +command, +except that unwritten changes are discarded without warning. +The current address is set to the last line read. + +.TP 8 +.RI f \ file +Sets the default filename to +.IR file . +If +.I file +is not specified, then the default unescaped filename is printed. + +.TP 8 +.RI (1,$)g /re/command-list +Applies +.I command-list +to each of the addressed lines matching a regular expression +.IR re . +The current address is set to the +line currently matched before +.I command-list +is executed. +At the end of the +.I `g' +command, the current address is set to the last line affected by +.IR command-list . + +Each command in +.I command-list +must be on a separate line, +and every line except for the last must be terminated by a backslash +(\\). +Any commands are allowed, except for +.IR `g' , +.IR `G' , +.IR `v' , +and +.IR `V' . +A newline alone in +.I command-list +is equivalent to a +.I `p' +command. + +.TP 8 +.RI (1,$)G /re/ +Interactively edits the addressed lines matching a regular expression +.IR re. +For each matching line, +the line is printed, +the current address is set, +and the user is prompted to enter a +.IR command-list . +At the end of the +.I `G' +command, the current address +is set to the last line affected by (the last) +.IR command-list . + +The format of +.I command-list +is the same as that of the +.I `g' +command. A newline alone acts as a null command list. +A single `&' repeats the last non-null command list. + +.TP 8 +H +Toggles the printing of error explanations. +By default, explanations are not printed. +It is recommended that ed scripts begin with this command to +aid in debugging. + +.TP 8 +h +Prints an explanation of the last error. + +.TP 8 +(.)i +Inserts text in the buffer before the current line. +Text is entered in input mode. +The current address is set to the last line entered. + +.TP 8 +(.,.+1)j +Joins the addressed lines. The addressed lines are +deleted from the buffer and replaced by a single +line containing their joined text. +The current address is set to the resultant line. + +.TP 8 +.RI (.)k lc +Marks a line with a lower case letter +.IR lc . +The line can then be addressed as +.I 'lc +(i.e., a single quote followed by +.I lc +) in subsequent commands. The mark is not cleared until the line is +deleted or otherwise modified. + +.TP 8 +(.,.)l +Prints the addressed lines unambiguously. +The current address is set to the last line +printed. + +.TP 8 +(.,.)m(.) +Moves lines in the buffer. The addressed lines are moved to after the +right-hand destination address, which may be the address +.IR 0 +(zero). +The current address is set to the +last line moved. + +.TP 8 +(.,.)n +Prints the addressed lines along with +their line numbers. The current address is set to the last line +printed. + +.TP 8 +(.,.)p +Prints the addressed lines. The current address is set to the last line +printed. + +.TP 8 +P +Toggles the command prompt on and off. +Unless a prompt was specified by with command-line option +\fI-p string\fR, the command prompt is by default turned off. + +.TP 8 +q +Quits ed. + +.TP 8 +Q +Quits ed unconditionally. +This is similar to the +.I q +command, +except that unwritten changes are discarded without warning. + +.TP 8 +.RI ($)r \ file +Reads +.I file +to after the addressed line. If +.I file +is not specified, then the default +filename is used. If there was no default filename prior to the command, +then the default filename is set to +.IR file . +Otherwise, the default filename is unchanged. +The current address is set to the last line read. + +.TP 8 +.RI ($)r \ !command +Reads +to after the addressed line +the standard output of +.IR `!command' , +executed as described below. +The default filename is unchanged. +The current address is set to the last line read. + +.HP +.RI (.,.)s /re/replacement/ +.PD 0 +.HP +.RI (.,.)s /re/replacement/\fRg\fR +.HP +.RI (.,.)s /re/replacement/n +.br +Replaces text in the addressed lines +matching a regular expression +.I re +with +.IR replacement . +By default, only the first match in each line is replaced. +The +.I `g' +(global) suffix causes every match to be replaced. +The +.I `n' +suffix, where +.I n +is a postive number, causes only the +.IR n th +match to be replaced. +It is an error if no substitutions are performed on any of the addressed +lines. +The current address is set the last line affected. + +.I re +and +.I replacement +may be delimited by any character other than space and newline. +If one or two of the last delimiters is omitted, then the last line +affected is printed as though the print suffix +.I `p' +were specified. + + +An unescaped `&' in +.I replacement +is replaced by the currently matched text. +The character sequence +\fI`\\m'\fR, +where +.I m +is a number in the range [1,9], is replaced by the +.IR m th +backreference expression of the matched text. +If +.I replacement +consists of a single `%', then +.I replacement +from the last substitution is used. +Newlines may be embedded in +.I replacement +if they are escaped with a backslash (\\). + +.TP 8 +(.,.)s +Repeats the last substitution. +This form of the +.I `s' +command may be suffixed with +any combination of the characters +.IR `r' , +.IR `g' , +and +.IR `p' . +The +.I `r' +suffix causes +the regular expression of the last search to be used instead of the +that of the last substitution. +The +.I `g' +suffix toggles the global suffix of the last substitution. +The +.I `p' +suffix toggles the print suffix of the last substitution +The current address is set to the last line affected. + +.TP 8 +(.,.)t(.) +Copies (i.e., transfers) the addressed lines to after the right-hand +destination address, which may be the address +.IR 0 +(zero). +The current address is set to the last line +copied. + +.TP 8 +u +Undoes the last command and restores the current address +to what it was before the command. +The global commands +.IR `g' , +.IR `G' , +.IR `v' , +and +.IR `V' . +are treated as a single command by undo. +.I `u' +is its own inverse. + +.TP 8 +.RI (1,$)v /pat/command-list +Applies +.I command-list +to each of the addressed lines not matching a regular expression +.IR re . +This is similar to the +.I `g' +command. + +.TP 8 +.RI (1,$)V /re/ +Interactively edits the addressed lines not matching a regular expression +.IR re. +This is similar to the +.I `G' +command. + +.TP 8 +.RI (1,$)w \ file +Writes the addressed lines to +.IR file . +Any previous contents of +.I file +is lost without warning. +If there is no default filename, then the default filename is set to +.IR file, +otherwise it is unchanged. If no filename is specified, then the default +filename is used. +The current address is unchanged. + +.TP 8 +.RI (1,$)wq \ file +Writes the addressed lines to +.IR file , +and then executes a +.I `q' +command. + +.TP 8 +.RI (1,$)w \ !command +Writes the addressed lines to the standard input of +.IR `!command' , +executed as described below. +The default filename and current address are unchanged. + +.TP 8 +.RI (1,$)W \ file +Appends the addressed lines to the end of +.IR file . +This is similar to the +.I `w' +command, expect that the previous contents of file is not clobbered. +The current address is unchanged. + +.TP 8 +x +Prompts for an encryption key which is used in subsequent reads and +writes. If a newline alone is entered as the key, then encryption is +turned off. Otherwise, echoing is disabled while a key is read. +Encryption/decryption is done using the bdes(1) algorithm. + +.TP 8 +.RI (.+1)z n +Scrolls +.I n +lines at a time starting at addressed line. If +.I n +is not specified, then the current window size is used. +The current address is set to the last line printed. + +.TP 8 +.RI ! command +Executes +.I command +via +.IR sh (1). +If the first character of +.I command +is `!', then it is replaced by text of the +previous +.IR `!command' . +.B ed +does not process +.I command +for backslash (\\) escapes. +However, an unescaped +.I `%' +is replaced by the default filename. +When the shell returns from execution, a `!' +is printed to the standard output. +The current line is unchanged. + +.TP 8 +.RI (.,.)! command +Replaces the addressed lines with the output of +.I `!command' +as described above. +The current address is set to the last line read. + +.TP 8 +($)= +Prints the line number of the addressed line. + +.TP 8 +(.+1)newline +Prints the addressed line, and sets the current address to +that line. + +.SH FILES +.TP 20 +/tmp/ed.* +Buffer file +.PD 0 +.TP 20 +\fR./ed.hup\fR, $HOME/ed.hup +First and second files to which +.B ed +attempts to write the buffer if the terminal hangs up. + +.SH SEE ALSO + +.IR vi (1), +.IR sed (1), +.IR regex (3), +.IR bdes (1), +.IR sh (1). + +USD:12-13 + +B. W. Kernighan and P. J. Plauger, +.I Software Tools in Pascal , +Addison-Wesley, 1981. + +.SH LIMITATIONS +.B ed +processes +.I file +arguments for backslash escapes, i.e., in a filename, +any characters preceded by a backslash (\\) are +interpreted literally. + +If a text (non-binary) file is not terminated by a newline character, +then +.B ed +appends one on reading/writing it. In the case of a binary file, +.B ed +does not append a newline on reading/writing. + +per line overhead: 4 ints + +.SH DIAGNOSTICS +When an error occurs, +.B ed +prints a `?' and either returns to command mode +or exits if its input is from a script. +An explanation of the last error can be +printed with the +.I `h' +(help) command. + +Since the +.I `g' +(global) command masks any errors from failed searches and substitutions, +it can be used to perform conditional operations in scripts; e.g., +.sp +.RS +g/\fIold\fR/s//\fInew\fR/ +.RE +.sp +replaces any occurrences of +.I old +with +.IR new . + +If diagnostics are not disabled, attempting to quit +.B ed +or edit another file before writing a modified buffer +results in an error. +If the command is entered a second time, it succeeds, +but any changes to the buffer are lost. diff --git a/bin/ed/ed.c b/bin/ed/ed.c new file mode 100644 index 0000000000..de226ff45c --- /dev/null +++ b/bin/ed/ed.c @@ -0,0 +1,2252 @@ +/* ed.c: This file contains the main control and user-interface routines + for the ed line editor. */ +/*- + * Copyright (c) 1993 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Andrew Moore, Talke Studio. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/*- + * Kernighan/Plauger, "Software Tools in Pascal," (c) 1981 by + * Addison-Wesley Publishing Company, Inc. Reprinted with permission of + * the publisher. + */ + +#ifndef lint +char copyright1[] = +"@(#) Copyright (c) 1993 The Regents of the University of California.\n\ + All rights reserved.\n"; +char copyright2[] = +"@(#) Kernighan/Plauger, Software Tools in Pascal, (c) 1981 by\n\ + Addison-Wesley Publishing Company, Inc. Reprinted with permission of\n\ + the publisher.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)ed.c 5.5 (Berkeley) 3/28/93"; +#endif /* not lint */ + +/* + * CREDITS + * The buf.c algorithm is attributed to Rodney Ruddock of + * the University of Guelph, Guelph, Ontario. + * + * The cbc.c encryption code is adapted from + * the bdes program by Matt Bishop of Dartmouth College, + * Hanover, NH. + * + * Addison-Wesley Publishing Company generously granted + * permission to distribute this program over Internet. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ed.h" + +#ifdef _POSIX_SOURCE +sigjmp_buf env; +#else +jmp_buf env; +#endif + +/* static buffers */ +char *shcmd; /* shell command buffer */ +int shcmdsz; /* shell command buffer size */ +int shcmdi; /* shell command buffer index */ +char *cvbuf; /* global command buffer */ +int cvbufsz; /* global command buffer size */ +char *lhbuf; /* lhs buffer */ +int lhbufsz; /* lhs buffer size */ +char *rhbuf; /* rhs buffer */ +int rhbufsz; /* rhs buffer size */ +int rhbufi; /* rhs buffer index */ +char *rbuf; /* regsub buffer */ +int rbufsz; /* regsub buffer size */ +char *sbuf; /* file i/o buffer */ +int sbufsz; /* file i/o buffer size */ +char *ibuf; /* ed command-line buffer */ +int ibufsz; /* ed command-line buffer size */ +char *ibufp; /* pointer to ed command-line buffer */ + +/* global flags */ +int isbinary; /* if set, buffer contains ASCII NULs */ +int modified; /* if set, buffer modified since last write */ +int garrulous = 0; /* if set, print all error messages */ +int scripted = 0; /* if set, suppress diagnostics */ +int des = 0; /* if set, use crypt(3) for i/o */ +int mutex = 0; /* if set, signals set "sigflags" */ +int sigflags = 0; /* if set, signals received while mutex set */ +int sigactive = 0; /* if set, signal handlers are enabled */ +int red = 0; /* if set, restrict shell/directory access */ + +char dfn[MAXFNAME + 1] = ""; /* default filename */ +long curln; /* current address */ +long lastln; /* last address */ +int lineno; /* script line number */ +char *prompt; /* command-line prompt */ +char *dps = "*"; /* default command-line prompt */ + +char *usage = "usage: %s [-] [-sx] [-p string] [name]\n"; + +extern char errmsg[]; +extern int optind; +extern char *optarg; + +/* ed: line editor */ +main(argc, argv) + int argc; + char **argv; +{ + int c, n; + long status = 0; + + red = (n = strlen(argv[0])) > 2 && argv[0][n - 3] == 'r'; +top: + while ((c = getopt(argc, argv, "p:sx")) != EOF) + switch(c) { + case 'p': /* set prompt */ + prompt = optarg; + break; + case 's': /* run script */ + scripted = 1; + break; + case 'x': /* use crypt */ +#ifdef DES + des = getkey(); +#else + fprintf(stderr, "crypt unavailable\n?\n"); +#endif + break; + + default: + fprintf(stderr, usage, argv[0]); + exit(1); + } + argv += optind; + argc -= optind; + if (argc && **argv == '-') { + scripted = 1; + if (argc > 1) { + optind = 1; + goto top; + } + argv++; + argc--; + } + /* assert: reliable signals! */ +#ifdef SIGWINCH + dowinch(SIGWINCH); + if (isatty(0)) signal(SIGWINCH, dowinch); +#endif + signal(SIGHUP, onhup); + signal(SIGQUIT, SIG_IGN); + signal(SIGINT, onintr); +#ifdef _POSIX_SOURCE + if (status = sigsetjmp(env, 1)) +#else + if (status = setjmp(env)) +#endif + { + fputs("\n?\n", stderr); + sprintf(errmsg, "interrupt"); + } else { + init_buf(); + sigactive = 1; /* enable signal handlers */ + if (argc && **argv && ckfn(*argv)) { + if (doread(0, *argv) < 0 && !isatty(0)) + quit(2); + else if (**argv != '!') + strcpy(dfn, *argv); + } else if (argc) { + fputs("?\n", stderr); + if (**argv == '\0') + sprintf(errmsg, "invalid filename"); + if (!isatty(0)) + quit(2); + } + } + for (;;) { + if (status < 0 && garrulous) + fprintf(stderr, "%s\n", errmsg); + if (prompt) { + printf("%s", prompt); + fflush(stdout); + } + if ((n = getline()) < 0) { + status = ERR; + continue; + } else if (n == 0) { + if (modified && !scripted) { + fputs("?\n", stderr); + sprintf(errmsg, "warning: file modified"); + if (!isatty(0)) { + fprintf(stderr, garrulous ? "script, line %d: %s\n" + : "", lineno, errmsg); + quit(2); + } + clearerr(stdin); + modified = 0; + status = EMOD; + continue; + } else + quit(0); + } else if (ibuf[n - 1] != '\n') { + /* discard line */ + sprintf(errmsg, "unexpected end-of-file"); + clearerr(stdin); + status = ERR; + continue; + } + if ((n = getlist()) >= 0 && (status = ckglob()) != 0) { + if (status > 0 && (status = doglob(status)) >= 0) { + curln = status; + continue; + } + } else if ((status = n) >= 0 && (status = docmd(0)) >= 0) { + if (!status || status + && (status = doprint(curln, curln, status)) >= 0) + continue; + } + switch (status) { + case EOF: + quit(0); + case EMOD: + modified = 0; + fputs("?\n", stderr); /* give warning */ + sprintf(errmsg, "warning: file modified"); + if (!isatty(0)) { + fprintf(stderr, garrulous ? "script, line %d: %s\n" : "", lineno, errmsg); + quit(2); + } + break; + case FATAL: + if (!isatty(0)) + fprintf(stderr, garrulous ? "script, line %d: %s\n" : "", lineno, errmsg); + else + fprintf(stderr, garrulous ? "%s\n" : "", errmsg); + quit(3); + default: + fputs("?\n", stderr); + if (!isatty(0)) { + fprintf(stderr, garrulous ? "script, line %d: %s\n" : "", lineno, errmsg); + quit(2); + } + break; + } + } + /*NOTREACHED*/ +} + + +long line1, line2, nlines; + +/* getlist: get line numbers from the command buffer until an illegal + address is seen. return range status */ +getlist() +{ + long num; + + nlines = line2 = 0; + while ((num = getone()) >= 0) { + line1 = line2; + line2 = num; + nlines++; + if (*ibufp != ',' && *ibufp != ';') + break; + else if (*ibufp++ == ';') + curln = num; + } + nlines = min(nlines, 2); + if (nlines == 0) + line2 = curln; + if (nlines <= 1) + line1 = line2; + return (num == ERR) ? ERR : nlines; +} + + +/* getone: return the next line number in the command buffer */ +long +getone() +{ + int c; + long i, num; + + if ((num = getnum(1)) < 0) + return num; + for (;;) { + c = isspace(*ibufp); + skipblanks(); + c = c && isdigit(*ibufp); + if (!c && *ibufp != '+' && *ibufp != '-' && *ibufp != '^') + break; + c = c ? '+' : *ibufp++; + if ((i = getnum(0)) < 0) { + sprintf(errmsg, "invalid address"); + return i; + } + if (c == '+') + num += i; + else num -= i; + } + if (num > lastln || num < 0) { + sprintf(errmsg, "invalid address"); + return ERR; + } + return num; +} + + +#define MAXMARK 26 /* max number of marks */ + +line_t *mark[MAXMARK]; /* line markers */ +int markno; /* line marker count */ + +/* getnum: return a relative line number from the command buffer */ +long +getnum(first) + int first; +{ + pattern_t *pat; + char c; + + skipblanks(); + if (isdigit(*ibufp)) + return strtol(ibufp, &ibufp, 10); + switch(c = *ibufp) { + case '.': + ibufp++; + return first ? curln : ERR; + case '$': + ibufp++; + return first ? lastln : ERR; + case '/': + case '?': + if ((pat = optpat()) == NULL) + return ERR; + else if (*ibufp == c) + ibufp++; + return first ? patscan(pat, (c == '/') ? 1 : 0) : ERR; + case '^': + case '-': + case '+': + return first ? curln : 1; + case '\'': + ibufp++; + return (first && islower(*ibufp)) ? getaddr(mark[*ibufp++ - 'a']) : ERR; + case '%': + case ',': + case ';': + if (first) { + ibufp++; + line2 = (c == ';') ? curln : 1; + nlines++; + return lastln; + } + return 1; + default: + return first ? EOF : 1; + } +} + + +/* gflags */ +#define GLB 001 /* global command */ +#define GPR 002 /* print after command */ +#define GLS 004 /* list after command */ +#define GNP 010 /* enumerate after command */ +#define GSG 020 /* global substitute */ + + +/* VRFYCMD: verify the command suffix in the command buffer */ +#define VRFYCMD() { \ + int done = 0; \ + do { \ + switch(*ibufp) { \ + case 'p': \ + gflag |= GPR, ibufp++; \ + break; \ + case 'l': \ + gflag |= GLS, ibufp++; \ + break; \ + case 'n': \ + gflag |= GNP, ibufp++; \ + break; \ + default: \ + done++; \ + } \ + } while (!done); \ + if (*ibufp++ != '\n') { \ + sprintf(errmsg, "invalid command suffix"); \ + return ERR; \ + } \ +} + + +/* ckglob: set lines matching a pattern in the command buffer; return + global status */ +ckglob() +{ + pattern_t *pat; + char c, delim; + char *s; + int nomatch; + long n; + line_t *lp; + int gflag = 0; /* print suffix of interactive cmd */ + + if ((c = *ibufp) == 'V' || c == 'G') + gflag = GLB; + else if (c != 'g' && c != 'v') + return 0; + if (ckrange(1, lastln) < 0) + return ERR; + else if ((delim = *++ibufp) == ' ' || delim == '\n') { + sprintf(errmsg, "invalid pattern delimiter"); + return ERR; + } else if ((pat = optpat()) == NULL) + return ERR; + else if (*ibufp == delim) + ibufp++; + if (gflag) + VRFYCMD(); /* get print suffix */ + for (lp = getlp(n = 1); n <= lastln; n++, lp = lp->next) { + if ((s = gettxt(lp)) == NULL) + return ERR; + lp->len &= ~ACTV; /* zero ACTV bit */ + if (isbinary) + s = nultonl(s, lp->len & ~ACTV); + if (line1 <= n && n <= line2 + && (!(nomatch = regexec(pat, s, 0, NULL, 0)) + && (c == 'g' || c == 'G') + || nomatch && (c == 'v' || c == 'V'))) + lp->len |= ACTV; + } + return gflag | GSG; +} + + +/* doglob: apply command list in the command buffer to the active + lines in a range; return command status */ +long +doglob(gflag) + int gflag; +{ + static char *ocmd = NULL; + static int ocmdsz = 0; + + line_t *lp = NULL; + long lc; + int status; + int n; + int interact = gflag & ~GSG; /* GLB & gflag ? */ + char *cmd = NULL; + +#ifdef BACKWARDS + if (!interact) + if (!strcmp(ibufp, "\n")) + cmd = "p\n"; /* null cmd-list == `p' */ + else if ((cmd = getcmdv(&n, 0)) == NULL) + return ERR; +#else + if (!interact && (cmd = getcmdv(&n, 0)) == NULL) + return ERR; +#endif + ureset(); + for (;;) { + for (lp = getlp(lc = 1); lc <= lastln; lc++, lp = lp->next) + if (lp->len & ACTV) /* active line */ + break; + if (lc > lastln) + break; + lp->len ^= ACTV; /* zero ACTV bit */ + curln = lc; + if (interact) { + /* print curln and get a command in global syntax */ + if (doprint(curln, curln, 0) < 0) + return ERR; + while ((n = getline()) > 0 + && ibuf[n - 1] != '\n') + clearerr(stdin); + if (n < 0) + return ERR; + else if (n == 0) { + sprintf(errmsg, "unexpected end-of-file"); + return ERR; + } else if (n == 1 && !strcmp(ibuf, "\n")) + continue; + else if (n == 2 && !strcmp(ibuf, "&\n")) { + if (cmd == NULL) { + sprintf(errmsg, "no previous command"); + return ERR; + } else cmd = ocmd; + } else if ((cmd = getcmdv(&n, 0)) == NULL) + return ERR; + else { + CKBUF(ocmd, ocmdsz, n + 1, ERR); + memcpy(ocmd, cmd, n + 1); + cmd = ocmd; + } + + } + ibufp = cmd; + for (; *ibufp;) + if ((status = getlist()) < 0 + || (status = docmd(1)) < 0 + || (status > 0 + && (status = doprint(curln, curln, status)) < 0)) + return status; + } + return ((interact & ~GLB ) && doprint(curln, curln, interact) < 0) ? ERR : curln; +} + + +#ifdef BACKWARDS +/* GETLINE3: get a legal address from the command buffer */ +#define GETLINE3(num) \ +{ \ + long ol1, ol2; \ +\ + ol1 = line1, ol2 = line2; \ + if (getlist() < 0) \ + return ERR; \ + else if (nlines == 0) { \ + sprintf(errmsg, "destination expected"); \ + return ERR; \ + } else if (line2 < 0 || lastln < line2) { \ + sprintf(errmsg, "invalid address"); \ + return ERR; \ + } \ + num = line2; \ + line1 = ol1, line2 = ol2; \ +} +#else /* BACKWARDS */ +/* GETLINE3: get a legal address from the command buffer */ +#define GETLINE3(num) \ +{ \ + long ol1, ol2; \ +\ + ol1 = line1, ol2 = line2; \ + if (getlist() < 0) \ + return ERR; \ + if (line2 < 0 || lastln < line2) { \ + sprintf(errmsg, "invalid address"); \ + return ERR; \ + } \ + num = line2; \ + line1 = ol1, line2 = ol2; \ +} +#endif + +/* sgflags */ +#define SGG 001 /* complement previous global substitute suffix */ +#define SGP 002 /* complement previous print suffix */ +#define SGR 004 /* use last regex instead of last pat */ +#define SGF 010 /* newline found */ + +long ucurln = -1; /* if >= 0, undo enabled */ +long ulastln = -1; /* if >= 0, undo enabled */ +int usw = 0; /* if set, undo last undo */ +int patlock = 0; /* if set, pattern not released by optpat() */ + +long rows = 22; /* scroll length: ws_row - 2 */ + +/* docmd: execute the next command in command buffer; return print + request, if any */ +docmd(glob) + int glob; +{ + static pattern_t *pat = NULL; + static int sgflag = 0; + + pattern_t *tpat; + char *fnp; + int gflag = 0; + int sflags = 0; + long num = 0; + int n = 0; + int c; + + skipblanks(); + switch(c = *ibufp++) { + case 'a': + VRFYCMD(); + if (!glob) ureset(); + if (append(line2, glob) < 0) + return ERR; + break; + case 'c': + if (ckrange(curln, curln) < 0) + return ERR; + VRFYCMD(); + if (!glob) ureset(); + if (lndelete(line1, line2) < 0 || append(curln, glob) < 0) + return ERR; + break; + case 'd': + if (ckrange(curln, curln) < 0) + return ERR; + VRFYCMD(); + if (!glob) ureset(); + if (lndelete(line1, line2) < 0) + return ERR; + else if (nextln(curln, lastln) != 0) + curln = nextln(curln, lastln); + modified = 1; + break; + case 'e': + if (modified && !scripted) + return EMOD; + /* fall through */ + case 'E': + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } else if (!isspace(*ibufp)) { + sprintf(errmsg, "unexpected command suffix"); + return ERR; + } else if ((fnp = getfn()) == NULL) + return ERR; + VRFYCMD(); + memset(mark, 0, sizeof mark); + lndelete(1, lastln); + ureset(); + if (sbclose() < 0) + return ERR; + else if (sbopen() < 0) + return FATAL; + if (*fnp && *fnp != '!') strcpy(dfn, fnp); +#ifdef BACKWARDS + if (*fnp == '\0' && *dfn == '\0') { + sprintf(errmsg, "no current filename"); + return ERR; + } +#endif + if (doread(0, *fnp ? fnp : dfn) < 0) + return ERR; + ureset(); + modified = 0; + ucurln = ulastln = -1; + break; + case 'f': + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } else if (!isspace(*ibufp)) { + sprintf(errmsg, "unexpected command suffix"); + return ERR; + } else if ((fnp = getfn()) == NULL) + return ERR; + else if (*fnp == '!') { + sprintf(errmsg, "invalid redirection"); + return ERR; + } + VRFYCMD(); + if (*fnp) strcpy(dfn, fnp); + printf("%s\n", esctos(dfn)); + break; + case 'g': + case 'G': + sprintf(errmsg, "cannot nest global commands"); + return ERR; + case 'h': + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } + VRFYCMD(); + if (*errmsg) fprintf(stderr, "%s\n", errmsg); + break; + case 'H': + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } + VRFYCMD(); + if ((garrulous = 1 - garrulous) && *errmsg) + fprintf(stderr, "%s\n", errmsg); + break; + case 'i': + if (line2 == 0) { + sprintf(errmsg, "invalid address"); + return ERR; + } + VRFYCMD(); + if (!glob) ureset(); + if (append(prevln(line2, lastln), glob) < 0) + return ERR; + break; + case 'j': + if (ckrange(curln, curln + 1) < 0) + return ERR; + VRFYCMD(); + if (!glob) ureset(); + if (line1 != line2 && join(line1, line2) < 0) + return ERR; + break; + case 'k': + c = *ibufp++; + if (line2 == 0) { + sprintf(errmsg, "invalid address"); + return ERR; + } else if (!islower(c)) { + sprintf(errmsg, "invalid mark character"); + return ERR; + } + VRFYCMD(); + if (!mark[c - 'a']) markno++; + mark[c - 'a'] = getlp(line2); + break; + case 'l': + if (ckrange(curln, curln) < 0) + return ERR; + VRFYCMD(); + if (doprint(line1, line2, gflag | GLS) < 0) + return ERR; + gflag = 0; + break; + case 'm': + if (ckrange(curln, curln) < 0) + return ERR; + GETLINE3(num); + if (line1 <= num && num < line2) { + sprintf(errmsg, "invalid destination"); + return ERR; + } + VRFYCMD(); + if (!glob) ureset(); + if (num == line1 - 1 || num == line2) + curln = line2; + else if (move(num) < 0) + return ERR; + else + modified = 1; + break; + case 'n': + if (ckrange(curln, curln) < 0) + return ERR; + VRFYCMD(); + if (doprint(line1, line2, gflag | GNP) < 0) + return ERR; + gflag = 0; + break; + case 'p': + if (ckrange(curln, curln) < 0) + return ERR; + VRFYCMD(); + if (doprint(line1, line2, gflag | GPR) < 0) + return ERR; + gflag = 0; + break; + case 'P': + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } + VRFYCMD(); + prompt = prompt ? NULL : optarg ? optarg : dps; + break; + case 'q': + case 'Q': + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } + VRFYCMD(); + gflag = (modified && !scripted && c == 'q') ? EMOD : EOF; + break; + case 'r': + if (!isspace(*ibufp)) { + sprintf(errmsg, "unexpected command suffix"); + return ERR; + } else if (nlines == 0) + line2 = lastln; + if ((fnp = getfn()) == NULL) + return ERR; + VRFYCMD(); + if (!glob) ureset(); + if (*dfn == '\0' && *fnp != '!') strcpy(dfn, fnp); +#ifdef BACKWARDS + if (*fnp == '\0' && *dfn == '\0') { + sprintf(errmsg, "no current filename"); + return ERR; + } +#endif + if ((num = doread(line2, *fnp ? fnp : dfn)) < 0) + return ERR; + else if (num && num != lastln) + modified = 1; + break; + case 's': + do { + switch(*ibufp) { + case '\n': + sflags |=SGF; + break; + case 'g': + sflags |= SGG; + ibufp++; + break; + case 'p': + sflags |= SGP; + ibufp++; + break; + case 'r': + sflags |= SGR; + ibufp++; + break; + default: + if (sflags) { + sprintf(errmsg, "invalid command suffix"); + return ERR; + } + } + } while (sflags && *ibufp != '\n'); + if (sflags && !pat) { + sprintf(errmsg, "no previous substitution"); + return ERR; + } else if (!(sflags & SGF)) + sgflag &= 0xff; + tpat = pat; + spl1(); + if ((!sflags || (sflags & SGR)) + && (tpat = optpat()) == NULL) + return ERR; + else if (tpat != pat) { + if (pat) { + regfree(pat); + free(pat); + } + pat = tpat; + patlock = 1; /* reserve pattern */ + } else if (pat == NULL) { + /* NOTREACHED */ + sprintf(errmsg, "no previous substitution"); + return ERR; + } + spl0(); + if (!sflags && (sgflag = getrhs(glob)) < 0) + return ERR; + else if (glob) + sgflag |= GLB; + else + sgflag &= ~GLB; + if (sflags & SGG) + sgflag ^= GSG; + if (sflags & SGP) + sgflag ^= GPR, sgflag &= ~(GLS | GNP); + do { + switch(*ibufp) { + case 'p': + sgflag |= GPR, ibufp++; + break; + case 'l': + sgflag |= GLS, ibufp++; + break; + case 'n': + sgflag |= GNP, ibufp++; + break; + default: + n++; + } + } while (!n); + if (ckrange(curln, curln) < 0) + return ERR; + VRFYCMD(); + if (!glob) ureset(); + if ((n = subst(pat, sgflag)) < 0) + return ERR; + else if (n) + modified = 1; + break; + case 't': + if (ckrange(curln, curln) < 0) + return ERR; + GETLINE3(num); + VRFYCMD(); + if (!glob) ureset(); + if (transfer(num) < 0) + return ERR; + modified = 1; + break; + case 'u': + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } + VRFYCMD(); + if (undo() < 0) + return ERR; + break; + case 'v': + case 'V': + sprintf(errmsg, "cannot nest global commands"); + return ERR; + case 'w': + case 'W': + if ((n = *ibufp) == 'q' || n == 'Q') { + gflag = EOF; + ibufp++; + } + if (!isspace(*ibufp)) { + sprintf(errmsg, "unexpected command suffix"); + return ERR; + } else if ((fnp = getfn()) == NULL) + return ERR; + if (nlines == 0 && !lastln) + line1 = line2 = 0; + else if (ckrange(1, lastln) < 0) + return ERR; + VRFYCMD(); + if (*dfn == '\0' && *fnp != '!') strcpy(dfn, fnp); +#ifdef BACKWARDS + if (*fnp == '\0' && *dfn == '\0') { + sprintf(errmsg, "no current filename"); + return ERR; + } +#endif + if ((num = dowrite(line1, line2, *fnp ? fnp : dfn, (c == 'W') ? "a" : "w")) < 0) + return ERR; + else if (num == lastln) + modified = 0; + else if (modified && !scripted && n == 'q') + gflag = EMOD; + break; + case 'x': + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } + VRFYCMD(); +#ifdef DES + des = getkey(); +#else + sprintf(errmsg, "crypt unavailable"); + return ERR; +#endif + break; + case 'z': +#ifdef BACKWARDS + if (ckrange(line1 = 1, curln + 1) < 0) +#else + if (ckrange(line1 = 1, curln + !glob) < 0) +#endif + return ERR; + else if ('0' < *ibufp && *ibufp <= '9') + rows = strtol(ibufp, &ibufp, 10); + VRFYCMD(); + if (doprint(line2, min(lastln, line2 + rows - 1), gflag) < 0) + return ERR; + gflag = 0; + break; + case '=': + VRFYCMD(); + printf("%d\n", nlines ? line2 : lastln); + break; + case '!': +#ifndef VI_BANG + if (nlines > 0) { + sprintf(errmsg, "unexpected address"); + return ERR; + } +#endif + if ((sflags = getshcmd()) < 0) + return ERR; + VRFYCMD(); + if (sflags) printf("%s\n", shcmd + 1); +#ifdef VI_BANG + if (nlines == 0) { +#endif + system(shcmd + 1); + if (!scripted) printf("!\n"); + break; +#ifdef VI_BANG + } + if (!lastln && !line1 && !line2) { + if (!glob) ureset(); + } else if (ckrange(curln, curln) < 0) + return ERR; + else { + if (!glob) ureset(); + if (lndelete(line1, line2) < 0) + return ERR; + line2 = curln; + modified = 1; + } + if ((num = doread(line2, shcmd)) < 0) + return ERR; + else if (num && num != lastln) + modified = 1; + break; +#endif + case '\n': +#ifdef BACKWARDS + if (ckrange(line1 = 1, curln + 1) < 0 +#else + if (ckrange(line1 = 1, curln + !glob) < 0 +#endif + || doprint(line2, line2, 0) < 0) + return ERR; + break; + default: + sprintf(errmsg, "unknown command"); + return ERR; + } + return gflag; +} + + +/* ckrange: return status of line number range check */ +ckrange(def1, def2) + long def1, def2; +{ + if (nlines == 0) { + line1 = def1; + line2 = def2; + } + if (line1 > line2 || 1 > line1 || line2 > lastln) { + sprintf(errmsg, "invalid address"); + return ERR; + } + return 0; +} + + +/* patscan: return the number of the next line matching a pattern in a + given direction. wrap around begin/end of line queue if necessary */ +long +patscan(pat, dir) + pattern_t *pat; + int dir; +{ + char *s; + long n = curln; + line_t *lp; + + do { + if (n = dir ? nextln(n, lastln) : prevln(n, lastln)) { + if ((s = gettxt(lp = getlp(n))) == NULL) + return ERR; + if (isbinary) + s = nultonl(s, lp->len & ~ACTV); + if (!regexec(pat, s, 0, NULL, 0)) + return n; + } + } while (n != curln); + sprintf(errmsg, "no match"); + return ERR; +} + + +/* getfn: return pointer to copy of filename in the command buffer */ +char * +getfn() +{ + static char *file = NULL; + static int filesz = 0; + + int n; + + if (*ibufp != '\n') { + skipblanks(); + if (*ibufp == '\n') { + sprintf(errmsg, "invalid filename"); + return NULL; + } else if ((ibufp = getcmdv(&n, 1)) == NULL) + return NULL; +#ifdef VI_BANG + else if (*ibufp == '!') { + ibufp++; + if ((n = getshcmd()) < 0) + return NULL; + if (n) printf("%s\n", shcmd + 1); + return shcmd; + } +#endif + else if (n - 1 > MAXFNAME) { + sprintf(errmsg, "filename too long"); + return NULL; + } + } +#ifndef BACKWARDS + else if (*dfn == '\0') { + sprintf(errmsg, "no current filename"); + return NULL; + } +#endif + CKBUF(file, filesz, MAXFNAME + 1, NULL); + for (n = 0; *ibufp != '\n';) + file[n++] = *ibufp++; + file[n] = '\0'; + return ckfn(file); +} + + +/* getrhs: extract substitution template from the command buffer */ +getrhs(glob) + int glob; +{ + char delim; + + if ((delim = *ibufp) == '\n') { + rhbufi = 0; + return GPR; + } else if (makesub(glob) == NULL) + return ERR; + else if (*ibufp == '\n') + return GPR; + else if (*ibufp == delim) + ibufp++; + if ('1' <= *ibufp && *ibufp <= '9') + return (int) strtol(ibufp, &ibufp, 10) << 8; + else if (*ibufp == 'g') { + ibufp++; + return GSG; + } + return 0; +} + + +/* makesub: return pointer to copy of substitution template in the command + buffer */ +char * +makesub(glob) + int glob; +{ + int n = 0; + int i = 0; + char delim = *ibufp++; + char c; + + if (*ibufp == '%' && *(ibufp + 1) == delim) { + ibufp++; + if (!rhbuf) sprintf(errmsg, "no previous substitution"); + return rhbuf; + } + while (*ibufp != delim) { + CKBUF(rhbuf, rhbufsz, i + 2, NULL); + if ((c = rhbuf[i++] = *ibufp++) == '\n' && *ibufp == '\0') { + i--, ibufp--; + break; + } else if (c != '\\') + ; + else if ((rhbuf[i++] = *ibufp++) != '\n') + ; + else if (!glob) { + while ((n = getline()) == 0 + || n > 0 && ibuf[n - 1] != '\n') + clearerr(stdin); + if (n < 0) + return NULL; + } else + /*NOTREACHED*/ + ; + } + CKBUF(rhbuf, rhbufsz, i + 1, NULL); + rhbuf[rhbufi = i] = '\0'; + return rhbuf; +} + + +/* getshcmd: read a shell command up a maximum size from stdin; return + substitution status */ +int +getshcmd() +{ + static char *buf = NULL; + static int n = 0; + + char *s; /* substitution char pointer */ + int i = 0; + int j = 0; + + if (red) { + sprintf(errmsg, "shell access restricted"); + return ERR; + } else if ((s = ibufp = getcmdv(&j, 1)) == NULL) + return ERR; + CKBUF(buf, n, j + 1, ERR); + buf[i++] = '!'; /* prefix command w/ bang */ + while (*ibufp != '\n') + switch (*ibufp) { + default: + CKBUF(buf, n, i + 2, ERR); + buf[i++] = *ibufp; + if (*ibufp++ == '\\') + buf[i++] = *ibufp++; + break; + case '!': + if (s != ibufp) { + CKBUF(buf, n, i + 1, ERR); + buf[i++] = *ibufp++; + } +#ifdef BACKWARDS + else if (shcmd == NULL || *(shcmd + 1) == '\0') +#else + else if (shcmd == NULL) +#endif + { + sprintf(errmsg, "no previous command"); + return ERR; + } else { + CKBUF(buf, n, i + shcmdi, ERR); + for (s = shcmd + 1; s < shcmd + shcmdi;) + buf[i++] = *s++; + s = ibufp++; + } + break; + case '%': + if (*dfn == '\0') { + sprintf(errmsg, "no current filename"); + return ERR; + } + j = strlen(s = esctos(dfn)); + CKBUF(buf, n, i + j, ERR); + while (j--) + buf[i++] = *s++; + s = ibufp++; + break; + } + CKBUF(shcmd, shcmdsz, i + 1, ERR); + memcpy(shcmd, buf, i); + shcmd[shcmdi = i] = '\0'; + return *s == '!' || *s == '%'; +} + + +/* append: insert text from stdin to after line n; stop when either a + single period is read or EOF; return status */ +append(n, glob) + long n; + int glob; +{ + int l; + char *lp = ibuf; + char *eot; + undo_t *up = NULL; + + for (curln = n;;) { + if (!glob) { + if ((l = getline()) < 0) + return ERR; + else if (l == 0 || ibuf[l - 1] != '\n') { + clearerr(stdin); + return l ? EOF : 0; + } + lp = ibuf; + } else if (*(lp = ibufp) == '\0') + return 0; + else { + while (*ibufp++ != '\n') + ; + l = ibufp - lp; + } + if (l == 2 && lp[0] == '.' && lp[1] == '\n') { + return 0; + } + eot = lp + l; + spl1(); + do { + if ((lp = puttxt(lp)) == NULL) { + spl0(); + return ERR; + } else if (up) + up->t = getlp(curln); + else if ((up = upush(UADD, curln, curln)) == NULL) { + spl0(); + return ERR; + } + } while (lp != eot); + spl0(); + modified = 1; + } +} + + +#ifdef sun +/* subst: change all text matching a pattern in a range of lines according to + a substitution template; return status */ +subst(pat, gflag) + pattern_t *pat; + int gflag; +{ + undo_t *up = NULL; + char *txt; + char *eot; + line_t *bp, *ep, *np; + long ocl; + long nsubs = 0; + int len; + + ep = getlp(curln = line2); + for (bp = getlp(line1); bp != ep->next; bp = bp->next) + if ((len = regsub(pat, bp, gflag)) < 0) + return ERR; + else if (!len) { + /* add copy of bp after current line - this avoids + overloading the undo structure, since only two + undo nodes are needed for the whole substitution; + the cost is high, but the less than if undo is + overloaded on a Sun evidently. XXX */ + if ((np = lpdup(bp)) == NULL) + return ERR; + spl1(); + lpqueue(np); + if (up) + up->t = getlp(curln); + else if ((up = upush(UADD, curln, curln)) == NULL) { + spl0(); + return ERR; + } + spl0(); + } else { + txt = rbuf; + eot = rbuf + len; + spl1(); + do { + if ((txt = puttxt(txt)) == NULL) { + spl0(); + return ERR; + } else if (up) + up->t = getlp(curln); + else if ((up = upush(UADD, curln, curln)) == NULL) { + spl0(); + return ERR; + } + } while (txt != eot); + spl0(); + nsubs++; + } + ocl = curln; + lndelete(line1, line2); + curln = ocl - (line2 - line1 + 1); + if (nsubs == 0 && !(gflag & GLB)) { + sprintf(errmsg, "no match"); + return ERR; + } else if ((gflag & (GPR | GLS | GNP)) + && doprint(curln, curln, gflag) < 0) + return ERR; + return 1; +} +#else /* sun */ + + +/* subst: change all text matching a pattern in a range of lines according to + a substitution template; return status */ +subst(pat, gflag) + pattern_t *pat; + int gflag; +{ + undo_t *up; + char *txt; + char *eot; + long lc; + int nsubs = 0; + line_t *lp; + int len; + + curln = prevln(line1, lastln); + for (lc = 0; lc <= line2 - line1; lc++) { + lp = getlp(curln = nextln(curln, lastln)); + if ((len = regsub(pat, lp, gflag)) < 0) + return ERR; + else if (len) { + up = NULL; + lndelete(curln, curln); + txt = rbuf; + eot = rbuf + len; + spl1(); + do { + if ((txt = puttxt(txt)) == NULL) { + spl0(); + return ERR; + } else if (up) + up->t = getlp(curln); + else if ((up = upush(UADD, curln, curln)) == NULL) { + spl0(); + return ERR; + } + } while (txt != eot); + spl0(); + nsubs++; + } + } + if (nsubs == 0 && !(gflag & GLB)) { + sprintf(errmsg, "no match"); + return ERR; + } else if ((gflag & (GPR | GLS | GNP)) + && doprint(curln, curln, gflag) < 0) + return ERR; + return 1; +} +#endif /* sun */ + + +/* regsub: replace text matched by a pattern according to a substitution + template; return pointer to the modified text */ +regsub(pat, lp, gflag) + pattern_t *pat; + line_t *lp; + int gflag; +{ + int off = 0; + int kth = gflag >> 8; /* substitute kth match only */ + int chngd = 0; + int matchno = 0; + int len; + int i = 0; + regmatch_t rm[SE_MAX]; + char *txt; + char *eot; + + if ((txt = gettxt(lp)) == NULL) + return ERR; + len = lp->len & ~ACTV; + eot = txt + len; + if (isbinary) txt = nultonl(txt, len); + if (!regexec(pat, txt, SE_MAX, rm, 0)) { + do { + if (!kth || kth == ++matchno) { + chngd++; + i = rm[0].rm_so; + CKBUF(rbuf, rbufsz, off + i, ERR); + if (isbinary) txt = nltonul(txt, rm[0].rm_eo); + memcpy(rbuf + off, txt, i); + if ((off = catsub(txt, rm, off += i)) < 0) + return ERR; + } else { + i = rm[0].rm_eo; + CKBUF(rbuf, rbufsz, off + i, ERR); + if (isbinary) txt = nltonul(txt, i); + memcpy(rbuf + off, txt, i); + off += i; + } + txt += rm[0].rm_eo; + } while (*txt && (!chngd || (gflag & GSG) && rm[0].rm_eo) + && !regexec(pat, txt, SE_MAX, rm, REG_NOTBOL)); + i = eot - txt; + CKBUF(rbuf, rbufsz, off + i + 2, ERR); + if (i > 0 && !rm[0].rm_eo && (gflag & GSG)) { + sprintf(errmsg, "infinite substitution loop"); + return ERR; + } + if (isbinary) txt = nltonul(txt, i); + memcpy(rbuf + off, txt, i); + memcpy(rbuf + off + i, "\n", 2); + } + return chngd ? off + i + 1 : 0; +} + + +/* join: replace a range of lines with the joined text of those lines */ +join(from, to) + long from; + long to; +{ + static char *buf = NULL; + static int n; + + char *s; + int len = 0; + int size = 0; + line_t *bp, *ep; + + ep = getlp(nextln(to, lastln)); + for (bp = getlp(from); bp != ep; bp = bp->next, size += len) { + if ((s = gettxt(bp)) == NULL) + return ERR; + len = bp->len & ~ACTV; + CKBUF(buf, n, size + len, ERR); + memcpy(buf + size, s, len); + } + CKBUF(buf, n, size + 2, ERR); + memcpy(buf + size, "\n", 2); + lndelete(from, to); + curln = from - 1; + spl1(); + if (puttxt(buf) == NULL + || upush(UADD, curln, curln) == NULL) { + spl0(); + return ERR; + } + spl0(); + modified = 1; + return 0; +} + + +/* move: move a range of lines */ +move(num) + long num; +{ + line_t *b1, *a1, *b2, *a2; + long n = nextln(line2, lastln); + long p = prevln(line1, lastln); + + spl1(); + if (upush(UMOV, p, n) == NULL + || upush(UMOV, num, nextln(num, lastln)) == NULL) { + spl0(); + return ERR; + } + a1 = getlp(n); + if (num < line1) + b1 = getlp(p), b2 = getlp(num); /* this getlp last! */ + else b2 = getlp(num), b1 = getlp(p); /* this getlp last! */ + a2 = b2->next; + requeue(b2, b1->next); + requeue(a1->prev, a2); + requeue(b1, a1); + curln = num + ((num < line1) ? line2 - line1 + 1 : 0); + spl0(); + return 0; +} + + +/* transfer: copy a range of lines; return status */ +transfer(num) + long num; +{ + line_t *lp; + long nl, nt, lc; + long mid = (num < line2) ? num : line2; + undo_t *up = NULL; + + curln = num; + for (nt = 0, nl = line1; nl <= mid; nl++, nt++) { + spl1(); + if ((lp = lpdup(getlp(nl))) == NULL) { + spl0(); + return ERR; + } + lpqueue(lp); + if (up) + up->t = lp; + else if ((up = upush(UADD, curln, curln)) == NULL) { + spl0(); + return ERR; + } + spl0(); + } + for (nl += nt, lc = line2 + nt; nl <= lc; nl += 2, lc++) { + spl1(); + if ((lp = lpdup(getlp(nl))) == NULL) { + spl0(); + return ERR; + } + lpqueue(lp); + if (up) + up->t = lp; + else if ((up = upush(UADD, curln, curln)) == NULL) { + spl0(); + return ERR; + } + spl0(); + } + return 0; +} + + +/* lndelete: delete a range of lines */ +lndelete(from, to) + long from, to; +{ + line_t *before, *after; + + spl1(); + if (upush(UDEL, from, to) == NULL) { + spl0(); + return ERR; + } + after = getlp(nextln(to, lastln)); + before = getlp(prevln(from, lastln)); /* this getlp last! */ + requeue(before, after); + lastln -= to - from + 1; + curln = prevln(from, lastln); + spl0(); + return 0; +} + + +/* catsub: modify text according to a substitution template; + return offset to end of modified text */ +catsub(boln, rm, off) + char *boln; + regmatch_t *rm; + int off; +{ + int j = 0; + int k = 0; + char *sub = rhbuf; + + for (; sub - rhbuf < rhbufi; sub++) + if (*sub == '&') { + j = rm[0].rm_so; + k = rm[0].rm_eo; + CKBUF(rbuf, rbufsz, off + k - j, ERR); + while (j < k) + rbuf[off++] = boln[j++]; + } else if (*sub == '\\' && '1' <= *++sub && *sub <= '9' + && rm[*sub - '0'].rm_so >= 0 + && rm[*sub - '0'].rm_eo >= 0) { + j = rm[*sub - '0'].rm_so; + k = rm[*sub - '0'].rm_eo; + CKBUF(rbuf, rbufsz, off + k - j, ERR); + while (j < k) + rbuf[off++] = boln[j++]; + } else { + CKBUF(rbuf, rbufsz, off + 1, ERR); + rbuf[off++] = *sub; + } + CKBUF(rbuf, rbufsz, off + 1, ERR); + rbuf[off] = '\0'; + return off; +} + +/* doprint: print a range of lines to stdout */ +doprint(from, to, gflag) + long from; + long to; + int gflag; +{ + line_t *bp; + line_t *ep; + char *s; + + if (!from) { + sprintf(errmsg, "invalid address"); + return ERR; + } + ep = getlp(nextln(to, lastln)); + for (bp = getlp(from); bp != ep; bp = bp->next) { + if ((s = gettxt(bp)) == NULL) + return ERR; + putstr(s, bp->len & ~ACTV, curln = from++, gflag); + } + return 0; +} + + +int cols = 72; /* wrap column: ws_col - 8 */ + +/* putstr: print text to stdout */ +void +putstr(s, l, n, gflag) + char *s; + int l; + long n; + int gflag; +{ + int col = 0; + + if (gflag & GNP) { + printf("%ld\t", n); + col = 8; + } + for (; l--; s++) { + if ((gflag & GLS) && ++col > cols) { + fputs("\\\n", stdout); + col = 1; + } + if (gflag & GLS) { + switch (*s) { + case '\b': + fputs("\\b", stdout); + break; + case '\f': + fputs("\\f", stdout); + break; + case '\n': + fputs("\\n", stdout); + break; + case '\r': + fputs("\\r", stdout); + break; + case '\t': + fputs("\\t", stdout); + break; + case '\v': + fputs("\\v", stdout); + break; + default: + if (*s < 32 || 126 < *s) { + putchar('\\'); + putchar((((unsigned char) *s & 0300) >> 6) + '0'); + putchar((((unsigned char) *s & 070) >> 3) + '0'); + putchar(((unsigned char) *s & 07) + '0'); + col += 2; + } else if (*s == '\\') + fputs("\\\\", stdout); + else { + putchar(*s); + col--; + } + } + col++; + } else + putchar(*s); + } +#ifndef BACKWARDS + if (gflag & GLS) + putchar('$'); +#endif + putchar('\n'); +} + + +int newline_added; /* set if newline appended to input file */ + +/* doread: read a text file into the editor buffer; return line count */ +long +doread(n, fn) + long n; + char *fn; +{ + FILE *fp; + line_t *lp = getlp(n); + unsigned long size = 0; + undo_t *up = NULL; + int len; + + isbinary = newline_added = 0; + if ((fp = (*fn == '!') ? popen(fn + 1, "r") : fopen(esctos(fn), "r")) == NULL) { + fprintf(stderr, "%s: %s\n", fn, strerror(errno)); + sprintf(errmsg, "cannot open input file"); + return ERR; + } else if (des) + desinit(); + for (curln = n; (len = sgetline(fp)) > 0; size += len) { + spl1(); + if (puttxt(sbuf) == NULL) { + spl0(); + return ERR; + } + lp = lp->next; + if (up) + up->t = lp; + else if ((up = upush(UADD, curln, curln)) == NULL) { + spl0(); + return ERR; + } + spl0(); + } + if (((*fn == '!') ? pclose(fp) : fclose(fp)) < 0) { + fprintf(stderr, "%s: %s\n", fn, strerror(errno)); + sprintf(errmsg, "cannot close input file"); + return ERR; + } + if (newline_added && !isbinary) + fputs("newline appended\n", stderr); + if (des) size += 8 - size % 8; + fprintf(stderr, !scripted ? "%lu\n" : "", size); + return (len < 0) ? ERR : curln - n; +} + + +/* dowrite: write the text of a range of lines to a file; return line count */ +long +dowrite(n, m, fn, mode) + long n; + long m; + char *fn; + char *mode; +{ + FILE *fp; + line_t *lp; + unsigned long size = 0; + long lc = n ? m - n + 1 : 0; + char *s = NULL; + int len; + int ct; + + if ((fp = ((*fn == '!') ? popen(fn + 1, "w") : fopen(esctos(fn), mode))) == NULL) { + fprintf(stderr, "%s: %s\n", fn, strerror(errno)); + sprintf(errmsg, "cannot open output file"); + return ERR; + } else if (des) + desinit(); + if (n && !des) + for (lp = getlp(n); n <= m; n++, lp = lp->next) { + if ((s = gettxt(lp)) == NULL) + return ERR; + len = lp->len & ~ACTV; + if (n != lastln || !isbinary || !newline_added) + s[len++] = '\n'; + if ((ct = fwrite(s, sizeof(char), len, fp)) < 0 || ct != len) { + fprintf(stderr, "%s: %s\n", fn, strerror(errno)); + sprintf(errmsg, "cannot write file"); + return ERR; + } + size += len; + } + else if (n) + for (lp = getlp(n); n <= m; n++, lp = lp->next) { + if ((s = gettxt(lp)) == NULL) + return ERR; + len = lp->len & ~ACTV; + while (len--) { + if (desputc(*s++, fp) == EOF && ferror(fp)) { + fprintf(stderr, "%s: %s\n", fn, strerror(errno)); + sprintf(errmsg, "cannot write file"); + return ERR; + } + } + if (n != lastln || !isbinary || !newline_added) { + if (desputc('\n', fp) < 0) { + fprintf(stderr, "%s: %s\n", fn, strerror(errno)); + sprintf(errmsg, "cannot write file"); + return ERR; + } + size++; /* for '\n' */ + } + size += (lp->len & ~ACTV); + } + if (des) { + desflush(fp); /* flush buffer */ + size += 8 - size % 8; /* adjust DES size */ + } + if (((*fn == '!') ? pclose(fp) : fclose(fp)) < 0) { + fprintf(stderr, "%s: %s\n", fn, strerror(errno)); + sprintf(errmsg, "cannot close output file"); + return ERR; + } + fprintf(stderr, !scripted ? "%lu\n" : "", size); + return lc; +} + + +#define USIZE 100 /* undo stack size */ +undo_t *ustack = NULL; /* undo stack */ +long usize = 0; /* stack size variable */ +long u_p = 0; /* undo stack pointer */ + + +/* upush: return pointer to intialized undo node */ +undo_t * +upush(type, from, to) + int type; + long from; + long to; +{ + undo_t *t; + +#if defined(sun) || defined(NO_REALLOC_NULL) + if (ustack == NULL + && (ustack = (undo_t *) malloc((usize = USIZE) * sizeof(undo_t))) == NULL) { + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "out of memory"); + return NULL; + } +#endif + t = ustack; + if (u_p < usize + || (t = (undo_t *) realloc(ustack, (usize += USIZE) * sizeof(undo_t))) != NULL) { + ustack = t; + ustack[u_p].type = type; + ustack[u_p].t = getlp(to); + ustack[u_p].h = getlp(from); + return ustack + u_p++; + } + /* out of memory - release undo stack */ + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "out of memory"); + ureset(); + free(ustack); + ustack = NULL; + usize = 0; + return NULL; +} + +/* undo: undo last change to the editor buffer */ +undo() +{ + long n = usw ? 0 : u_p - 1; + int i = usw ? 1 : -1; + long j = u_p; + long ocurln = curln; + long olastln = lastln; + + if (ucurln == -1 || ulastln == -1) { + sprintf(errmsg, "nothing to undo"); + return ERR; + } else if (u_p) + modified = 1; + getlp(0); /* this getlp last! */ + spl1(); + for (; j-- > 0; n += i) + switch(ustack[n].type ^ usw) { + case UADD: + requeue(ustack[n].h->prev, ustack[n].t->next); + break; + case UDEL: + requeue(ustack[n].h->prev, ustack[n].h); + requeue(ustack[n].t, ustack[n].t->next); + break; + case UMOV: + case VMOV: + requeue(ustack[n + i].h, ustack[n].h->next); + requeue(ustack[n].t->prev, ustack[n + i].t); + requeue(ustack[n].h, ustack[n].t); + n += i, j--; + break; + default: + /*NOTREACHED*/ + ; + } + usw = 1 - usw; + curln = ucurln, ucurln = ocurln; + lastln = ulastln, ulastln = olastln; + spl0(); + return 0; +} + + +/* ureset: clear the undo stack */ +void +ureset() +{ + line_t *lp, *ep, *tl; + int i; + + while (u_p--) + if ((ustack[u_p].type ^ usw) == UDEL) { + ep = ustack[u_p].t->next; + for (lp = ustack[u_p].h; lp != ep; lp = tl) { + if (markno) + for (i = 0; i < MAXMARK; i++) + if (mark[i] == lp) { + mark[i] = NULL; + markno--; + } + tl = lp->next; + free(lp); + } + } + u_p = usw = 0; + ucurln = curln; + ulastln = lastln; +} + + + +/* sgetline: read a line of text up a maximum size from a file; return + line length */ +sgetline(fp) + FILE *fp; +{ + register int c; + register int i = 0; + + while (((c = des ? desgetc(fp) : getc(fp)) != EOF || !feof(fp) && !ferror(fp)) && c != '\n') { + CKBUF(sbuf, sbufsz, i + 1, ERR); + if (!(sbuf[i++] = c)) isbinary = 1; + } + CKBUF(sbuf, sbufsz, i + 2, ERR); + if (c == '\n') + sbuf[i++] = c; + else if (feof(fp) && i) { + sbuf[i++] = '\n'; + newline_added = 1; + } else if (ferror(fp)) { + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "cannot read input file"); + return ERR; + } + sbuf[i] = '\0'; + return (isbinary && newline_added && i) ? --i : i; +} + + +/* getline: read a line of text up a maximum size from stdin; return + line length */ +getline() +{ + register int i = 0; + register int oi = 0; + char c; + + /* Read one character at a time to avoid i/o contention with shell + escapes invoked by nonterminal input, e.g., + ed - <seek = lp->seek; + np->len = (lp->len & ~ACTV); /* zero ACTV bit */ + return np; +} + + +/* oddesc: return the parity of escapes preceding a character in a + string */ +oddesc(s, t) + char *s; + char *t; +{ + return (s == t || *(t - 1) != '\\') ? 0 : !oddesc(s, t - 1); +} + + +/* esctos: return copy of escaped string */ +char * +esctos(s) + char *s; +{ + static char *file = NULL; + static int filesz = 0; + + int i = 0; + + CKBUF(file, filesz, MAXFNAME + 1, NULL); + /* assert: no trailing escape */ + while (file[i++] = (*s == '\\') ? *++s : *s) + s++; + return file; +} + + +void +onhup(signo) + int signo; +{ + if (mutex) + sigflags |= (1 << signo); + else dohup(signo); +} + + +void +onintr(signo) + int signo; +{ + if (mutex) + sigflags |= (1 << signo); + else dointr(signo); +} + + + +void +dohup(signo) + int signo; +{ + char *hup = NULL; /* hup filename */ + char *s; + int n; + + if (!sigactive) + quit(1); + sigflags &= ~(1 << signo); + if (lastln && dowrite(1, lastln, "ed.hup", "w") < 0 + && (s = getenv("HOME")) != NULL + && (n = strlen(s)) + 8 <= MAXFNAME /* "ed.hup" + '/' */ + && (hup = (char *) malloc(n + 10)) != NULL) { + strcpy(hup, s); + if (hup[n - 1] != '/') + hup[n] = '/', hup[n+1] = '\0'; + strcat(hup, "ed.hup"); + dowrite(1, lastln, hup, "w"); + } + quit(2); +} + + +void +dointr(signo) + int signo; +{ + if (!sigactive) + quit(1); + sigflags &= ~(1 << signo); +#ifdef _POSIX_SOURCE + siglongjmp(env, -1); +#else + longjmp(env, -1); +#endif +} + + +struct winsize ws; /* window size structure */ + +void +dowinch(signo) + int signo; +{ + sigflags &= ~(1 << signo); + if (ioctl(0, TIOCGWINSZ, (char *) &ws) >= 0) { + if (ws.ws_row > 2) rows = ws.ws_row - 2; + if (ws.ws_col > 8) cols = ws.ws_col - 8; + } +} + + +unsigned char ctab[256]; /* character translation table */ + +/* translit: translate characters in a string */ +char * +translit(s, len, from, to) + char *s; + int len; + int from; + int to; +{ + static int i = 0; + + unsigned char *us; + + ctab[i] = i; /* restore table to initial state */ + ctab[i = from] = to; + for (us = (unsigned char *) s; len-- > 0; us++) + *us = ctab[*us]; + return s; +} + + +line_t line0; /* initial node of line queue */ + +/* init_buf: open scratch buffer; initialize line queue */ +void +init_buf() +{ + int i = 0; + + if (sbopen() < 0) + quit(2); + requeue(&line0, &line0); + for (i = 0; i < 256; i++) + ctab[i] = i; +} + + +/* ckfn: return a legal filename */ +char * +ckfn(s) + char *s; +{ + if (red && (*s == '!' || !strcmp(s, "..") || strchr(s, '/'))) { + sprintf(errmsg, "shell access restricted"); + return NULL; + } + return s; +} diff --git a/bin/ed/ed.h b/bin/ed/ed.h new file mode 100644 index 0000000000..1b277fe935 --- /dev/null +++ b/bin/ed/ed.h @@ -0,0 +1,263 @@ +/* ed.h: type and constant definitions for the ed editor. */ +/* + * Copyright (c) 1993 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Andrew Moore, Talke Studio. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ed.h 5.5 (Berkeley) 3/28/93 + */ + +#include +#include +#if defined(BSD) && BSD >= 199103 || defined(__386BSD__) +# include /* for MAXPATHLEN */ +#endif +#include +#include + +#define BITSPERBYTE 8 +#define BITS(type) (BITSPERBYTE * (int)sizeof(type)) +#define CHARBITS BITS(char) +#define INTBITS BITS(int) +#define INTHIBIT (1 << (INTBITS - 1)) + +#define ERR (-2) +#define EMOD (-3) +#define FATAL (-4) + +#ifndef MAXPATHLEN +# define MAXPATHLEN 255 /* _POSIX_PATH_MAX */ +#endif + +#define MAXFNAME MAXPATHLEN /* max file name size */ +#define MINBUFSZ 512 /* minimum buffer size - must be > 0 */ +#define LINECHARS (INTHIBIT - 1) /* max chars per line */ +#define SE_MAX 30 /* max subexpressions in a regular expression */ + +typedef regex_t pattern_t; + +/* Line node */ +typedef struct line { + struct line *next; + struct line *prev; + off_t seek; /* address of line in scratch buffer */ + +#define ACTV INTHIBIT /* active bit: high bit of len */ + + int len; /* length of line */ +} line_t; + + +typedef struct undo { + +/* type of undo nodes */ +#define UADD 0 +#define UDEL 1 +#define UMOV 2 +#define VMOV 3 + + int type; /* command type */ + line_t *h; /* head of list */ + line_t *t; /* tail of list */ +} undo_t; + +#ifndef max +# define max(a,b) ((a) > (b) ? (a) : (b)) +#endif +#ifndef min +# define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + +/* nextln: return line after l mod k */ +#define nextln(l,k) ((l)+1 > (k) ? 0 : (l)+1) + +/* nextln: return line before l mod k */ +#define prevln(l,k) ((l)-1 < 0 ? (k) : (l)-1) + +#define skipblanks() while (isspace(*ibufp) && *ibufp != '\n') ibufp++ + +/* spl1: disable some interrupts (requires reliable signals) */ +#define spl1() mutex++ + +/* spl0: enable all interrupts; check sigflags (requires reliable signals) */ +#define spl0() \ +if (--mutex == 0) { \ + if (sigflags & (1 << SIGHUP)) dohup(SIGHUP); \ + if (sigflags & (1 << SIGINT)) dointr(SIGINT); \ +} + +#if defined(sun) || defined(NO_REALLOC_NULL) +/* CKBUF: assure at least a minimum size for buffer b */ +#define CKBUF(b,n,i,err) \ +if ((i) > (n)) { \ + int ti = (n); \ + char *ts; \ + spl1(); \ + if ((b) != NULL) { \ + if ((ts = (char *) realloc((b), ti += max((i), MINBUFSZ))) == NULL) { \ + fprintf(stderr, "%s\n", strerror(errno)); \ + sprintf(errmsg, "out of memory"); \ + spl0(); \ + return err; \ + } \ + } else { \ + if ((ts = (char *) malloc(ti += max((i), MINBUFSZ))) == NULL) { \ + fprintf(stderr, "%s\n", strerror(errno)); \ + sprintf(errmsg, "out of memory"); \ + spl0(); \ + return err; \ + } \ + } \ + (n) = ti; \ + (b) = ts; \ + spl0(); \ +} +#else /* NO_REALLOC_NULL */ +/* CKBUF: assure at least a minimum size for buffer b */ +#define CKBUF(b,n,i,err) \ +if ((i) > (n)) { \ + int ti = (n); \ + char *ts; \ + spl1(); \ + if ((ts = (char *) realloc((b), ti += max((i), MINBUFSZ))) == NULL) { \ + fprintf(stderr, "%s\n", strerror(errno)); \ + sprintf(errmsg, "out of memory"); \ + spl0(); \ + return err; \ + } \ + (n) = ti; \ + (b) = ts; \ + spl0(); \ +} +#endif /* NO_REALLOC_NULL */ + +/* requeue: link pred before succ */ +#define requeue(pred, succ) (pred)->next = (succ), (succ)->prev = (pred) + +/* insqueue: insert elem in circular queue after pred */ +#define insqueue(elem, pred) \ +{ \ + requeue((elem), (pred)->next); \ + requeue((pred), elem); \ +} + +/* remqueue: remove elem from circular queue */ +#define remqueue(elem) requeue((elem)->prev, (elem)->next); + +/* nultonl: overwrite ASCII NULs with newlines */ +#define nultonl(s, l) translit(s, l, '\0', '\n') + +/* nltonul: overwrite newlines with ASCII NULs */ +#define nltonul(s, l) translit(s, l, '\n', '\0') + +#ifndef strerror +# define strerror(n) sys_errlist[n] +#endif + +#ifndef __P +# ifndef __STDC__ +# define __P(proto) () +# else +# define __P(proto) proto +# endif +#endif + +/* local function declarations */ +int append __P((long, int)); +int cbcdec __P((char *, FILE *)); +int cbcenc __P((char *, int, FILE *)); +char *ckfn __P((char *)); +int ckglob __P((void)); +int ckrange __P((long, long)); +int desflush __P((FILE *)); +int desgetc __P((FILE *)); +void desinit __P((void)); +int desputc __P((int, FILE *)); +int docmd __P((int)); +void err __P((char *)); +char *ccl __P((char *)); +void cvtkey __P((char *, char *)); +long doglob __P((int)); +void dohup __P((int)); +void dointr __P((int)); +void dowinch __P((int)); +int doprint __P((long, long, int)); +long doread __P((long, char *)); +long dowrite __P((long, long, char *, char *)); +char *esctos __P((char *)); +long patscan __P((pattern_t *, int)); +long getaddr __P((line_t *)); +char *getcmdv __P((int *, int)); +char *getfn __P((void)); +int getkey __P((void)); +char *getlhs __P((int)); +int getline __P((void)); +int getlist __P((void)); +long getnum __P((int)); +long getone __P((void)); +line_t *getlp __P((long)); +int getrhs __P((int)); +int getshcmd __P((void)); +char *gettxt __P((line_t *)); +void init_buf __P((void)); +int join __P((long, long)); +int lndelete __P((long, long)); +line_t *lpdup __P((line_t *)); +void lpqueue __P((line_t *)); +void makekey __P((char *)); +char *makesub __P((int)); +char *translit __P((char *, int, int, int)); +int move __P((long)); +int oddesc __P((char *, char *)); +void onhup __P((int)); +void onintr __P((int)); +pattern_t *optpat __P((void)); +void putstr __P((char *, int, long, int)); +char *puttxt __P((char *)); +void quit __P((int)); +int regsub __P((pattern_t *, line_t *, int)); +int sbclose __P((void)); +int sbopen __P((void)); +int sgetline __P((FILE *)); +int catsub __P((char *, regmatch_t *, int)); +int subst __P((pattern_t *, int)); +int tobinhex __P((int, int)); +int transfer __P((long)); +int undo __P((void)); +undo_t *upush __P((int, long, long)); +void ureset __P((void)); + + +extern char *sys_errlist[]; +extern int mutex; +extern int sigflags; diff --git a/bin/ed/re.c b/bin/ed/re.c new file mode 100644 index 0000000000..462c816626 --- /dev/null +++ b/bin/ed/re.c @@ -0,0 +1,156 @@ +/* re.c: This file contains the regular expression interface routines for + the ed line editor. */ +/*- + * Copyright (c) 1993 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Andrew Moore, Talke Studio. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)re.c 5.5 (Berkeley) 3/28/93"; +#endif /* not lint */ + +#include +#include +#include + +#include "ed.h" + +extern char *lhbuf; +extern int lhbufsz; +extern char *ibufp; +extern int ibufsz; +extern int patlock; + +char errmsg[MAXFNAME + 40] = ""; + +/* optpat: return pointer to compiled pattern from command buffer */ +pattern_t * +optpat() +{ + static pattern_t *exp = NULL; + + char *exps; + char delim; + int n; + + if ((delim = *ibufp) == '\n') { + if (!exp) sprintf(errmsg, "no previous pattern"); + return exp; + } else if (delim == ' ' || *++ibufp == '\n') { + sprintf(errmsg, "invalid pattern delimiter"); + return NULL; + } else if (*ibufp == delim) { + sprintf(errmsg, "no previous pattern"); + return exp; + } else if ((exps = getlhs(delim)) == NULL) + return NULL; + /* buffer alloc'd && not reserved */ + if (exp && !patlock) + regfree(exp); + else if ((exp = (pattern_t *) malloc(sizeof(pattern_t))) == NULL) { + fprintf(stderr, "%s\n", strerror(errno)); + sprintf(errmsg, "out of memory"); + return NULL; + } + patlock = 0; +#ifdef GNU_REGEX + /* initialize pattern buffer */ + exp->buffer = NULL; + exp->allocated = 0L; + exp->fastmap = 0; /* not used by GNU regex after 0.12 */ + exp->translate = 0; +#endif + if (n = regcomp(exp, exps, 0)) { + regerror(n, exp, errmsg, sizeof errmsg); + return NULL; + } + return exp; +} + + +extern int isbinary; + +/* getlhs: copy a pattern string from the command buffer; return pointer + to the copy */ +char * +getlhs(delim) + int delim; +{ + char *nd; + int len; + + for (nd = ibufp; *nd != delim && *nd != '\n'; nd++) + switch (*nd) { + default: + break; + case '[': + if ((nd = ccl(++nd)) == NULL) { + sprintf(errmsg, "unbalanced brackets ([])"); + return NULL; + } + break; + case '\\': + if (*++nd == '\n') { + sprintf(errmsg, "trailing backslash (\\)"); + return NULL; + } + break; + } + len = nd - ibufp; + CKBUF(lhbuf, lhbufsz, len + 1, NULL); + memcpy(lhbuf, ibufp, len); + lhbuf[len] = '\0'; + ibufp = nd; + return (isbinary) ? nultonl(lhbuf, len) : lhbuf; +} + + +/* ccl: expand a POSIX character class */ +char * +ccl(s) + char *s; +{ + int c, d; + + if (*s == '^') + s++; + if (*s == ']') + s++; + for (; *s != ']' && *s != '\n'; s++) + if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) + for (s++, c = *++s; *s != ']' || c != d; s++) + if ((c = *s) == '\n') + return NULL; + return (*s == ']') ? s : NULL; +} diff --git a/bin/ed/regex.c b/bin/ed/regex.c new file mode 100644 index 0000000000..8169880d7e --- /dev/null +++ b/bin/ed/regex.c @@ -0,0 +1,4948 @@ +/* Extended regular expression matching and search library, + version 0.12. + (Implements POSIX draft P10003.2/D11.2, except for + internationalization features.) + + Copyright (C) 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* AIX requires this to be the first thing in the file. */ +#if defined (_AIX) && !defined (REGEX_MALLOC) + #pragma alloca +#endif + +#define _GNU_SOURCE + +/* We need this for `regex.h', and perhaps for the Emacs include files. */ +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* The `emacs' switch turns on certain matching commands + that make sense only in Emacs. */ +#ifdef emacs + +#include "lisp.h" +#include "buffer.h" +#include "syntax.h" + +/* Emacs uses `NULL' as a predicate. */ +#undef NULL + +#else /* not emacs */ + +/* We used to test for `BSTRING' here, but only GCC and Emacs define + `BSTRING', as far as I know, and neither of them use this code. */ +#if HAVE_STRING_H || STDC_HEADERS +#include +#ifndef bcmp +#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +#endif +#ifndef bcopy +#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#endif +#ifndef bzero +#define bzero(s, n) memset ((s), 0, (n)) +#endif +#else +#include +#endif + +#ifdef STDC_HEADERS +#include +#else +char *malloc (); +char *realloc (); +#endif + + +/* Define the syntax stuff for \<, \>, etc. */ + +/* This must be nonzero for the wordchar and notwordchar pattern + commands in re_match_2. */ +#ifndef Sword +#define Sword 1 +#endif + +#ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +#else /* not SYNTAX_TABLE */ + +/* How many characters in the character set. */ +#define CHAR_SET_SIZE 256 + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void +init_syntax_once () +{ + register int c; + static int done = 0; + + if (done) + return; + + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 'a'; c <= 'z'; c++) + re_syntax_table[c] = Sword; + + for (c = 'A'; c <= 'Z'; c++) + re_syntax_table[c] = Sword; + + for (c = '0'; c <= '9'; c++) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} + +#endif /* not SYNTAX_TABLE */ + +#define SYNTAX(c) re_syntax_table[c] + +#endif /* not emacs */ + +/* Get the interface, including the syntax bits. */ +#include "regex.h" + +/* isalpha etc. are used for the character classes. */ +#include + +#ifndef isascii +#define isascii(c) 1 +#endif + +#ifdef isblank +#define ISBLANK(c) (isascii (c) && isblank (c)) +#else +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') +#endif +#ifdef isgraph +#define ISGRAPH(c) (isascii (c) && isgraph (c)) +#else +#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) +#endif + +#define ISPRINT(c) (isascii (c) && isprint (c)) +#define ISDIGIT(c) (isascii (c) && isdigit (c)) +#define ISALNUM(c) (isascii (c) && isalnum (c)) +#define ISALPHA(c) (isascii (c) && isalpha (c)) +#define ISCNTRL(c) (isascii (c) && iscntrl (c)) +#define ISLOWER(c) (isascii (c) && islower (c)) +#define ISPUNCT(c) (isascii (c) && ispunct (c)) +#define ISSPACE(c) (isascii (c) && isspace (c)) +#define ISUPPER(c) (isascii (c) && isupper (c)) +#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) + +#ifndef NULL +#define NULL 0 +#endif + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', + since ours (we hope) works properly with all combinations of + machines, compilers, `char' and `unsigned char' argument types. + (Per Bothner suggested the basic approach.) */ +#undef SIGN_EXTEND_CHAR +#if __STDC__ +#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +#endif + +/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we + use `alloca' instead of `malloc'. This is because using malloc in + re_search* or re_match* could cause memory leaks when C-g is used in + Emacs; also, malloc is slower and causes storage fragmentation. On + the other hand, malloc is more portable, and easier to debug. + + Because we sometimes use alloca, some routines have to be macros, + not functions -- `alloca'-allocated space disappears at the end of the + function it is called in. */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE malloc +#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) + +#else /* not REGEX_MALLOC */ + +/* Emacs already defines alloca, sometimes. */ +#ifndef alloca + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include +#else /* not __GNUC__ or HAVE_ALLOCA_H */ +#ifndef _AIX /* Already did AIX, up at the top. */ +char *alloca (); +#endif /* not _AIX */ +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#endif /* not alloca */ + +#define REGEX_ALLOCATE alloca + +/* Assumes a `char *destination' variable. */ +#define REGEX_REALLOCATE(source, osize, nsize) \ + (destination = (char *) alloca (nsize), \ + bcopy (source, destination, osize), \ + destination) + +#endif /* not REGEX_MALLOC */ + + +/* True if `size1' is non-NULL and PTR is pointing anywhere inside + `string1' or just past its end. This works if PTR is NULL, which is + a good thing. */ +#define FIRST_STRING_P(ptr) \ + (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) + +/* (Re)Allocate N items of type T using malloc, or fail. */ +#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) + +#define BYTEWIDTH 8 /* In bits. */ + +#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; +#define false 0 +#define true 1 + +/* These are the command codes that appear in compiled regular + expressions. Some opcodes are followed by argument bytes. A + command code can specify any interpretation whatsoever for its + arguments. Zero bytes may appear in the compiled regular expression. + + The value of `exactn' is needed in search.c (search_buffer) in Emacs. + So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of + `exactn' we use here must also be 1. */ + +typedef enum +{ + no_op = 0, + + /* Followed by one byte giving n, then by n literal bytes. */ + exactn = 1, + + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match_2.) */ + start_memory, + + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, + + /* Same as jump, but marks the end of an alternative. */ + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + dummy_failure_jump, + + /* Push a dummy failure point and continue. Used at the end of + alternatives. */ + push_dummy_failure, + + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + succeed_n, + + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + jump_n, + + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + set_number_at, + + wordchar, /* Matches any word-constituent character. */ + notwordchar, /* Matches any char that is not a word-constituent. */ + + wordbeg, /* Succeeds if at word beginning. */ + wordend, /* Succeeds if at word end. */ + + wordbound, /* Succeeds if at a word boundary. */ + notwordbound /* Succeeds if not at a word boundary. */ + +#ifdef emacs + ,before_dot, /* Succeeds if before point. */ + at_dot, /* Succeeds if at point. */ + after_dot, /* Succeeds if after point. */ + + /* Matches any character whose syntax is specified. Followed by + a byte which contains a syntax code, e.g., Sword. */ + syntaxspec, + + /* Matches any character whose syntax is not that specified. */ + notsyntaxspec +#endif /* emacs */ +} re_opcode_t; + +/* Common operations on the compiled pattern. */ + +/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ + +#define STORE_NUMBER(destination, number) \ + do { \ + (destination)[0] = (number) & 0377; \ + (destination)[1] = (number) >> 8; \ + } while (0) + +/* Same as STORE_NUMBER, except increment DESTINATION to + the byte after where the number is stored. Therefore, DESTINATION + must be an lvalue. */ + +#define STORE_NUMBER_AND_INCR(destination, number) \ + do { \ + STORE_NUMBER (destination, number); \ + (destination) += 2; \ + } while (0) + +/* Put into DESTINATION a number stored in two contiguous bytes starting + at SOURCE. */ + +#define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source) & 0377; \ + (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ + } while (0) + +#ifdef DEBUG +static void +extract_number (dest, source) + int *dest; + unsigned char *source; +{ + int temp = SIGN_EXTEND_CHAR (*(source + 1)); + *dest = *source & 0377; + *dest += temp << 8; +} + +#ifndef EXTRACT_MACROS /* To debug the macros. */ +#undef EXTRACT_NUMBER +#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. + SOURCE must be an lvalue. */ + +#define EXTRACT_NUMBER_AND_INCR(destination, source) \ + do { \ + EXTRACT_NUMBER (destination, source); \ + (source) += 2; \ + } while (0) + +#ifdef DEBUG +static void +extract_number_and_incr (destination, source) + int *destination; + unsigned char **source; +{ + extract_number (destination, *source); + *source += 2; +} + +#ifndef EXTRACT_MACROS +#undef EXTRACT_NUMBER_AND_INCR +#define EXTRACT_NUMBER_AND_INCR(dest, src) \ + extract_number_and_incr (&dest, &src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* If DEBUG is defined, Regex prints many voluminous messages about what + it is doing (if the variable `debug' is nonzero). If linked with the + main program in `iregex.c', you can enter patterns and strings + interactively. And if linked with the main program in `main.c' and + the other test files, you can run the already-written tests. */ + +#ifdef DEBUG + +/* We use standard I/O for debugging. */ +#include + +/* It is useful to test things that ``must'' be true when debugging. */ +#include + +static int debug = 0; + +#define DEBUG_STATEMENT(e) e +#define DEBUG_PRINT1(x) if (debug) printf (x) +#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug) print_partial_compiled_pattern (s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug) print_double_string (w, s1, sz1, s2, sz2) + + +extern void printchar (); + +/* Print the fastmap in human-readable form. */ + +void +print_fastmap (fastmap) + char *fastmap; +{ + unsigned was_a_range = 0; + unsigned i = 0; + + while (i < (1 << BYTEWIDTH)) + { + if (fastmap[i++]) + { + was_a_range = 0; + printchar (i - 1); + while (i < (1 << BYTEWIDTH) && fastmap[i]) + { + was_a_range = 1; + i++; + } + if (was_a_range) + { + printf ("-"); + printchar (i - 1); + } + } + } + putchar ('\n'); +} + + +/* Print a compiled pattern string in human-readable form, starting at + the START pointer into it and ending just before the pointer END. */ + +void +print_partial_compiled_pattern (start, end) + unsigned char *start; + unsigned char *end; +{ + int mcnt, mcnt2; + unsigned char *p = start; + unsigned char *pend = end; + + if (start == NULL) + { + printf ("(null)\n"); + return; + } + + /* Loop over pattern commands. */ + while (p < pend) + { + switch ((re_opcode_t) *p++) + { + case no_op: + printf ("/no_op"); + break; + + case exactn: + mcnt = *p++; + printf ("/exactn/%d", mcnt); + do + { + putchar ('/'); + printchar (*p++); + } + while (--mcnt); + break; + + case start_memory: + mcnt = *p++; + printf ("/start_memory/%d/%d", mcnt, *p++); + break; + + case stop_memory: + mcnt = *p++; + printf ("/stop_memory/%d/%d", mcnt, *p++); + break; + + case duplicate: + printf ("/duplicate/%d", *p++); + break; + + case anychar: + printf ("/anychar"); + break; + + case charset: + case charset_not: + { + register int c; + + printf ("/charset%s", + (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); + + assert (p + *p < pend); + + for (c = 0; c < *p; c++) + { + unsigned bit; + unsigned char map_byte = p[1 + c]; + + putchar ('/'); + + for (bit = 0; bit < BYTEWIDTH; bit++) + if (map_byte & (1 << bit)) + printchar (c * BYTEWIDTH + bit); + } + p += 1 + *p; + break; + } + + case begline: + printf ("/begline"); + break; + + case endline: + printf ("/endline"); + break; + + case on_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_jump/0/%d", mcnt); + break; + + case on_failure_keep_string_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_keep_string_jump/0/%d", mcnt); + break; + + case dummy_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/dummy_failure_jump/0/%d", mcnt); + break; + + case push_dummy_failure: + printf ("/push_dummy_failure"); + break; + + case maybe_pop_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/maybe_pop_jump/0/%d", mcnt); + break; + + case pop_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/pop_failure_jump/0/%d", mcnt); + break; + + case jump_past_alt: + extract_number_and_incr (&mcnt, &p); + printf ("/jump_past_alt/0/%d", mcnt); + break; + + case jump: + extract_number_and_incr (&mcnt, &p); + printf ("/jump/0/%d", mcnt); + break; + + case succeed_n: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2); + break; + + case jump_n: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2); + break; + + case set_number_at: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2); + break; + + case wordbound: + printf ("/wordbound"); + break; + + case notwordbound: + printf ("/notwordbound"); + break; + + case wordbeg: + printf ("/wordbeg"); + break; + + case wordend: + printf ("/wordend"); + +#ifdef emacs + case before_dot: + printf ("/before_dot"); + break; + + case at_dot: + printf ("/at_dot"); + break; + + case after_dot: + printf ("/after_dot"); + break; + + case syntaxspec: + printf ("/syntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; + + case notsyntaxspec: + printf ("/notsyntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; +#endif /* emacs */ + + case wordchar: + printf ("/wordchar"); + break; + + case notwordchar: + printf ("/notwordchar"); + break; + + case begbuf: + printf ("/begbuf"); + break; + + case endbuf: + printf ("/endbuf"); + break; + + default: + printf ("?%d", *(p-1)); + } + } + printf ("/\n"); +} + + +void +print_compiled_pattern (bufp) + struct re_pattern_buffer *bufp; +{ + unsigned char *buffer = bufp->buffer; + + print_partial_compiled_pattern (buffer, buffer + bufp->used); + printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); + + if (bufp->fastmap_accurate && bufp->fastmap) + { + printf ("fastmap: "); + print_fastmap (bufp->fastmap); + } + + printf ("re_nsub: %d\t", bufp->re_nsub); + printf ("regs_alloc: %d\t", bufp->regs_allocated); + printf ("can_be_null: %d\t", bufp->can_be_null); + printf ("newline_anchor: %d\n", bufp->newline_anchor); + printf ("no_sub: %d\t", bufp->no_sub); + printf ("not_bol: %d\t", bufp->not_bol); + printf ("not_eol: %d\t", bufp->not_eol); + printf ("syntax: %d\n", bufp->syntax); + /* Perhaps we should print the translate table? */ +} + + +void +print_double_string (where, string1, size1, string2, size2) + const char *where; + const char *string1; + const char *string2; + int size1; + int size2; +{ + unsigned this_char; + + if (where == NULL) + printf ("(null)"); + else + { + if (FIRST_STRING_P (where)) + { + for (this_char = where - string1; this_char < size1; this_char++) + printchar (string1[this_char]); + + where = string2; + } + + for (this_char = where - string2; this_char < size2; this_char++) + printchar (string2[this_char]); + } +} + +#else /* not DEBUG */ + +#undef assert +#define assert(e) + +#define DEBUG_STATEMENT(e) +#define DEBUG_PRINT1(x) +#define DEBUG_PRINT2(x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + +#endif /* not DEBUG */ + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. */ + +static const char *re_error_msg[] = + { NULL, /* REG_NOERROR */ + "No match", /* REG_NOMATCH */ + "Invalid regular expression", /* REG_BADPAT */ + "Invalid collation character", /* REG_ECOLLATE */ + "Invalid character class name", /* REG_ECTYPE */ + "Trailing backslash", /* REG_EESCAPE */ + "Invalid back reference", /* REG_ESUBREG */ + "Unmatched [ or [^", /* REG_EBRACK */ + "Unmatched ( or \\(", /* REG_EPAREN */ + "Unmatched \\{", /* REG_EBRACE */ + "Invalid content of \\{\\}", /* REG_BADBR */ + "Invalid range end", /* REG_ERANGE */ + "Memory exhausted", /* REG_ESPACE */ + "Invalid preceding regular expression", /* REG_BADRPT */ + "Premature end of regular expression", /* REG_EEND */ + "Regular expression too big", /* REG_ESIZE */ + "Unmatched ) or \\)", /* REG_ERPAREN */ + }; + +/* Subroutine declarations and macros for regex_compile. */ + +static void store_op1 (), store_op2 (); +static void insert_op1 (), insert_op2 (); +static boolean at_begline_loc_p (), at_endline_loc_p (); +static boolean group_in_compile_stack (); +static reg_errcode_t compile_range (); + +/* Fetch the next character in the uncompiled pattern---translating it + if necessary. Also cast from a signed character in the constant + string passed to us by the user to an unsigned char that we can use + as an array index (in, e.g., `translate'). */ +#define PATFETCH(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + if (translate) c = translate[c]; \ + } while (0) + +/* Fetch the next character in the uncompiled pattern, with no + translation. */ +#define PATFETCH_RAW(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + } while (0) + +/* Go backwards one character in the pattern. */ +#define PATUNFETCH p-- + + +/* If `translate' is non-null, return translate[D], else just D. We + cast the subscript to translate because some data is declared as + `char *', to avoid warnings when a string constant is passed. But + when we use a character as a subscript we must make it unsigned. */ +#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) + + +/* Macros for outputting the compiled pattern into `buffer'. */ + +/* If the buffer isn't allocated when it comes in, use this. */ +#define INIT_BUF_SIZE 32 + +/* Make sure we have at least N more bytes of space in buffer. */ +#define GET_BUFFER_SPACE(n) \ + while (b - bufp->buffer + (n) > bufp->allocated) \ + EXTEND_BUFFER () + +/* Make sure we have one more byte of buffer space and then add C to it. */ +#define BUF_PUSH(c) \ + do { \ + GET_BUFFER_SPACE (1); \ + *b++ = (unsigned char) (c); \ + } while (0) + + +/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ +#define BUF_PUSH_2(c1, c2) \ + do { \ + GET_BUFFER_SPACE (2); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + } while (0) + + +/* As with BUF_PUSH_2, except for three bytes. */ +#define BUF_PUSH_3(c1, c2, c3) \ + do { \ + GET_BUFFER_SPACE (3); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + *b++ = (unsigned char) (c3); \ + } while (0) + + +/* Store a jump with opcode OP at LOC to location TO. We store a + relative address offset by the three bytes the jump itself occupies. */ +#define STORE_JUMP(op, loc, to) \ + store_op1 (op, loc, (to) - (loc) - 3) + +/* Likewise, for a two-argument jump. */ +#define STORE_JUMP2(op, loc, to, arg) \ + store_op2 (op, loc, (to) - (loc) - 3, arg) + +/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP(op, loc, to) \ + insert_op1 (op, loc, (to) - (loc) - 3, b) + +/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP2(op, loc, to, arg) \ + insert_op2 (op, loc, (to) - (loc) - 3, arg, b) + + +/* This is not an arbitrary limit: the arguments which represent offsets + into the pattern are two bytes long. So if 2^16 bytes turns out to + be too small, many things would have to change. */ +#define MAX_BUF_SIZE (1L << 16) + + +/* Extend the buffer by twice its current size via realloc and + reset the pointers that pointed into the old block to point to the + correct places in the new one. If extending the buffer results in it + being larger than MAX_BUF_SIZE, then flag memory exhausted. */ +#define EXTEND_BUFFER() \ + do { \ + unsigned char *old_buffer = bufp->buffer; \ + if (bufp->allocated == MAX_BUF_SIZE) \ + return REG_ESIZE; \ + bufp->allocated <<= 1; \ + if (bufp->allocated > MAX_BUF_SIZE) \ + bufp->allocated = MAX_BUF_SIZE; \ + bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ + if (bufp->buffer == NULL) \ + return REG_ESPACE; \ + /* If the buffer moved, move all the pointers into it. */ \ + if (old_buffer != bufp->buffer) \ + { \ + b = (b - old_buffer) + bufp->buffer; \ + begalt = (begalt - old_buffer) + bufp->buffer; \ + if (fixup_alt_jump) \ + fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ + if (laststart) \ + laststart = (laststart - old_buffer) + bufp->buffer; \ + if (pending_exact) \ + pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ + } \ + } while (0) + + +/* Since we have one byte reserved for the register number argument to + {start,stop}_memory, the maximum number of groups we can report + things about is what fits in that byte. */ +#define MAX_REGNUM 255 + +/* But patterns can have more than `MAX_REGNUM' registers. We just + ignore the excess. */ +typedef unsigned regnum_t; + + +/* Macros for the compile stack. */ + +/* Since offsets can go either forwards or backwards, this type needs to + be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ +typedef int pattern_offset_t; + +typedef struct +{ + pattern_offset_t begalt_offset; + pattern_offset_t fixup_alt_jump; + pattern_offset_t inner_group_offset; + pattern_offset_t laststart_offset; + regnum_t regnum; +} compile_stack_elt_t; + + +typedef struct +{ + compile_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} compile_stack_type; + + +#define INIT_COMPILE_STACK_SIZE 32 + +#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) +#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) + +/* The next available element. */ +#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) + + +/* Set the bit for character C in a list. */ +#define SET_LIST_BIT(c) \ + (b[((unsigned char) (c)) / BYTEWIDTH] \ + |= 1 << (((unsigned char) c) % BYTEWIDTH)) + + +/* Get the next unsigned number in the uncompiled pattern. */ +#define GET_UNSIGNED_NUMBER(num) \ + { if (p != pend) \ + { \ + PATFETCH (c); \ + while (ISDIGIT (c)) \ + { \ + if (num < 0) \ + num = 0; \ + num = num * 10 + c - '0'; \ + if (p == pend) \ + break; \ + PATFETCH (c); \ + } \ + } \ + } + +#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ + +#define IS_CHAR_CLASS(string) \ + (STREQ (string, "alpha") || STREQ (string, "upper") \ + || STREQ (string, "lower") || STREQ (string, "digit") \ + || STREQ (string, "alnum") || STREQ (string, "xdigit") \ + || STREQ (string, "space") || STREQ (string, "print") \ + || STREQ (string, "punct") || STREQ (string, "graph") \ + || STREQ (string, "cntrl") || STREQ (string, "blank")) + +/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. + Returns one of error codes defined in `regex.h', or zero for success. + + Assumes the `allocated' (and perhaps `buffer') and `translate' + fields are set in BUFP on entry. + + If it succeeds, results are put in BUFP (if it returns an error, the + contents of BUFP are undefined): + `buffer' is the compiled pattern; + `syntax' is set to SYNTAX; + `used' is set to the length of the compiled pattern; + `fastmap_accurate' is zero; + `re_nsub' is the number of subexpressions in PATTERN; + `not_bol' and `not_eol' are zero; + + The `fastmap' and `newline_anchor' fields are neither + examined nor set. */ + +static reg_errcode_t +regex_compile (pattern, size, syntax, bufp) + const char *pattern; + int size; + reg_syntax_t syntax; + struct re_pattern_buffer *bufp; +{ + /* We fetch characters from PATTERN here. Even though PATTERN is + `char *' (i.e., signed), we declare these variables as unsigned, so + they can be reliably used as array indices. */ + register unsigned char c, c1; + + /* A random tempory spot in PATTERN. */ + const char *p1; + + /* Points to the end of the buffer, where we should append. */ + register unsigned char *b; + + /* Keeps track of unclosed groups. */ + compile_stack_type compile_stack; + + /* Points to the current (ending) position in the pattern. */ + const char *p = pattern; + const char *pend = pattern + size; + + /* How to translate the characters in the pattern. */ + char *translate = bufp->translate; + + /* Address of the count-byte of the most recently inserted `exactn' + command. This makes it possible to tell if a new exact-match + character can be added to that command or if the character requires + a new `exactn' command. */ + unsigned char *pending_exact = 0; + + /* Address of start of the most recently finished expression. + This tells, e.g., postfix * where to find the start of its + operand. Reset at the beginning of groups and alternatives. */ + unsigned char *laststart = 0; + + /* Address of beginning of regexp, or inside of last group. */ + unsigned char *begalt; + + /* Place in the uncompiled pattern (i.e., the {) to + which to go back if the interval is invalid. */ + const char *beg_interval; + + /* Address of the place where a forward jump should go to the end of + the containing expression. Each alternative of an `or' -- except the + last -- ends with a forward jump of this sort. */ + unsigned char *fixup_alt_jump = 0; + + /* Counts open-groups as they are encountered. Remembered for the + matching close-group on the compile stack, so the same register + number is put in the stop_memory as the start_memory. */ + regnum_t regnum = 0; + +#ifdef DEBUG + DEBUG_PRINT1 ("\nCompiling pattern: "); + if (debug) + { + unsigned debug_count; + + for (debug_count = 0; debug_count < size; debug_count++) + printchar (pattern[debug_count]); + putchar ('\n'); + } +#endif /* DEBUG */ + + /* Initialize the compile stack. */ + compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); + if (compile_stack.stack == NULL) + return REG_ESPACE; + + compile_stack.size = INIT_COMPILE_STACK_SIZE; + compile_stack.avail = 0; + + /* Initialize the pattern buffer. */ + bufp->syntax = syntax; + bufp->fastmap_accurate = 0; + bufp->not_bol = bufp->not_eol = 0; + + /* Set `used' to zero, so that if we return an error, the pattern + printer (for debugging) will think there's no pattern. We reset it + at the end. */ + bufp->used = 0; + + /* Always count groups, whether or not bufp->no_sub is set. */ + bufp->re_nsub = 0; + +#if !defined (emacs) && !defined (SYNTAX_TABLE) + /* Initialize the syntax table. */ + init_syntax_once (); +#endif + + if (bufp->allocated == 0) + { + if (bufp->buffer) + { /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. */ + RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); + } + else + { /* Caller did not allocate a buffer. Do it for them. */ + bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); + } + if (!bufp->buffer) return REG_ESPACE; + + bufp->allocated = INIT_BUF_SIZE; + } + + begalt = b = bufp->buffer; + + /* Loop through the uncompiled pattern until we're at the end. */ + while (p != pend) + { + PATFETCH (c); + + switch (c) + { + case '^': + { + if ( /* If at start of pattern, it's an operator. */ + p == pattern + 1 + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's come before. */ + || at_begline_loc_p (pattern, p, syntax)) + BUF_PUSH (begline); + else + goto normal_char; + } + break; + + + case '$': + { + if ( /* If at end of pattern, it's an operator. */ + p == pend + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's next. */ + || at_endline_loc_p (p, pend, syntax)) + BUF_PUSH (endline); + else + goto normal_char; + } + break; + + + case '+': + case '?': + if ((syntax & RE_BK_PLUS_QM) + || (syntax & RE_LIMITED_OPS)) + goto normal_char; + handle_plus: + case '*': + /* If there is no previous pattern... */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + return REG_BADRPT; + else if (!(syntax & RE_CONTEXT_INDEP_OPS)) + goto normal_char; + } + + { + /* Are we optimizing this jump? */ + boolean keep_string_p = false; + + /* 1 means zero (many) matches is allowed. */ + char zero_times_ok = 0, many_times_ok = 0; + + /* If there is a sequence of repetition chars, collapse it + down to just one (the right one). We can't combine + interval operators with these because of, e.g., `a{2}*', + which should only match an even number of `a's. */ + + for (;;) + { + zero_times_ok |= c != '+'; + many_times_ok |= c != '?'; + + if (p == pend) + break; + + PATFETCH (c); + + if (c == '*' + || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) + ; + + else if (syntax & RE_BK_PLUS_QM && c == '\\') + { + if (p == pend) return REG_EESCAPE; + + PATFETCH (c1); + if (!(c1 == '+' || c1 == '?')) + { + PATUNFETCH; + PATUNFETCH; + break; + } + + c = c1; + } + else + { + PATUNFETCH; + break; + } + + /* If we get here, we found another repeat character. */ + } + + /* Star, etc. applied to an empty pattern is equivalent + to an empty pattern. */ + if (!laststart) + break; + + /* Now we know whether or not zero matches is allowed + and also whether or not two or more matches is allowed. */ + if (many_times_ok) + { /* More than one repetition is allowed, so put in at the + end a backward relative jump from `b' to before the next + jump we're going to put in below (which jumps from + laststart to after this jump). + + But if we are at the `*' in the exact sequence `.*\n', + insert an unconditional jump backwards to the ., + instead of the beginning of the loop. This way we only + push a failure point once, instead of every time + through the loop. */ + assert (p - 1 > pattern); + + /* Allocate the space for the jump. */ + GET_BUFFER_SPACE (3); + + /* We know we are not at the first character of the pattern, + because laststart was nonzero. And we've already + incremented `p', by the way, to be the character after + the `*'. Do we have to do something analogous here + for null bytes, because of RE_DOT_NOT_NULL? */ + if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') + && zero_times_ok + && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') + && !(syntax & RE_DOT_NEWLINE)) + { /* We have .*\n. */ + STORE_JUMP (jump, b, laststart); + keep_string_p = true; + } + else + /* Anything else. */ + STORE_JUMP (maybe_pop_jump, b, laststart - 3); + + /* We've added more stuff to the buffer. */ + b += 3; + } + + /* On failure, jump from laststart to b + 3, which will be the + end of the buffer after this jump is inserted. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump + : on_failure_jump, + laststart, b + 3); + pending_exact = 0; + b += 3; + + if (!zero_times_ok) + { + /* At least one repetition is required, so insert a + `dummy_failure_jump' before the initial + `on_failure_jump' instruction of the loop. This + effects a skip over that instruction the first time + we hit that loop. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); + b += 3; + } + } + break; + + + case '.': + laststart = b; + BUF_PUSH (anychar); + break; + + + case '[': + { + boolean had_char_class = false; + + if (p == pend) return REG_EBRACK; + + /* Ensure that we have enough space to push a charset: the + opcode, the length count, and the bitset; 34 bytes in all. */ + GET_BUFFER_SPACE (34); + + laststart = b; + + /* We test `*p == '^' twice, instead of using an if + statement, so we only need one BUF_PUSH. */ + BUF_PUSH (*p == '^' ? charset_not : charset); + if (*p == '^') + p++; + + /* Remember the first position in the bracket expression. */ + p1 = p; + + /* Push the number of bytes in the bitmap. */ + BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); + + /* Clear the whole map. */ + bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + + /* charset_not matches newline according to a syntax bit. */ + if ((re_opcode_t) b[-2] == charset_not + && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) + SET_LIST_BIT ('\n'); + + /* Read in characters and ranges, setting map bits. */ + for (;;) + { + if (p == pend) return REG_EBRACK; + + PATFETCH (c); + + /* \ might escape characters inside [...] and [^...]. */ + if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') + { + if (p == pend) return REG_EESCAPE; + + PATFETCH (c1); + SET_LIST_BIT (c1); + continue; + } + + /* Could be the end of the bracket expression. If it's + not (i.e., when the bracket expression is `[]' so + far), the ']' character bit gets set way below. */ + if (c == ']' && p != p1 + 1) + break; + + /* Look ahead to see if it's a range when the last thing + was a character class. */ + if (had_char_class && c == '-' && *p != ']') + return REG_ERANGE; + + /* Look ahead to see if it's a range when the last thing + was a character: if this is a hyphen not at the + beginning or the end of a list, then it's the range + operator. */ + if (c == '-' + && !(p - 2 >= pattern && p[-2] == '[') + && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') + && *p != ']') + { + reg_errcode_t ret + = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) return ret; + } + + else if (p[0] == '-' && p[1] != ']') + { /* This handles ranges made up of characters only. */ + reg_errcode_t ret; + + /* Move past the `-'. */ + PATFETCH (c1); + + ret = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) return ret; + } + + /* See if we're at the beginning of a possible character + class. */ + + else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') + { /* Leave room for the null. */ + char str[CHAR_CLASS_MAX_LENGTH + 1]; + + PATFETCH (c); + c1 = 0; + + /* If pattern is `[[:'. */ + if (p == pend) return REG_EBRACK; + + for (;;) + { + PATFETCH (c); + if (c == ':' || c == ']' || p == pend + || c1 == CHAR_CLASS_MAX_LENGTH) + break; + str[c1++] = c; + } + str[c1] = '\0'; + + /* If isn't a word bracketed by `[:' and:`]': + undo the ending character, the letters, and leave + the leading `:' and `[' (but set bits for them). */ + if (c == ':' && *p == ']') + { + int ch; + boolean is_alnum = STREQ (str, "alnum"); + boolean is_alpha = STREQ (str, "alpha"); + boolean is_blank = STREQ (str, "blank"); + boolean is_cntrl = STREQ (str, "cntrl"); + boolean is_digit = STREQ (str, "digit"); + boolean is_graph = STREQ (str, "graph"); + boolean is_lower = STREQ (str, "lower"); + boolean is_print = STREQ (str, "print"); + boolean is_punct = STREQ (str, "punct"); + boolean is_space = STREQ (str, "space"); + boolean is_upper = STREQ (str, "upper"); + boolean is_xdigit = STREQ (str, "xdigit"); + + if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) return REG_EBRACK; + + for (ch = 0; ch < 1 << BYTEWIDTH; ch++) + { + if ( (is_alnum && ISALNUM (ch)) + || (is_alpha && ISALPHA (ch)) + || (is_blank && ISBLANK (ch)) + || (is_cntrl && ISCNTRL (ch)) + || (is_digit && ISDIGIT (ch)) + || (is_graph && ISGRAPH (ch)) + || (is_lower && ISLOWER (ch)) + || (is_print && ISPRINT (ch)) + || (is_punct && ISPUNCT (ch)) + || (is_space && ISSPACE (ch)) + || (is_upper && ISUPPER (ch)) + || (is_xdigit && ISXDIGIT (ch))) + SET_LIST_BIT (ch); + } + had_char_class = true; + } + else + { + c1++; + while (c1--) + PATUNFETCH; + SET_LIST_BIT ('['); + SET_LIST_BIT (':'); + had_char_class = false; + } + } + else + { + had_char_class = false; + SET_LIST_BIT (c); + } + } + + /* Discard any (non)matching list bytes that are all 0 at the + end of the map. Decrease the map-length byte too. */ + while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) + b[-1]--; + b += b[-1]; + } + break; + + + case '(': + if (syntax & RE_NO_BK_PARENS) + goto handle_open; + else + goto normal_char; + + + case ')': + if (syntax & RE_NO_BK_PARENS) + goto handle_close; + else + goto normal_char; + + + case '\n': + if (syntax & RE_NEWLINE_ALT) + goto handle_alt; + else + goto normal_char; + + + case '|': + if (syntax & RE_NO_BK_VBAR) + goto handle_alt; + else + goto normal_char; + + + case '{': + if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) + goto handle_interval; + else + goto normal_char; + + + case '\\': + if (p == pend) return REG_EESCAPE; + + /* Do not translate the character after the \, so that we can + distinguish, e.g., \B from \b, even if we normally would + translate, e.g., B to b. */ + PATFETCH_RAW (c); + + switch (c) + { + case '(': + if (syntax & RE_NO_BK_PARENS) + goto normal_backslash; + + handle_open: + bufp->re_nsub++; + regnum++; + + if (COMPILE_STACK_FULL) + { + RETALLOC (compile_stack.stack, compile_stack.size << 1, + compile_stack_elt_t); + if (compile_stack.stack == NULL) return REG_ESPACE; + + compile_stack.size <<= 1; + } + + /* These are the values to restore when we hit end of this + group. They are all relative offsets, so that if the + whole pattern moves because of realloc, they will still + be valid. */ + COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; + COMPILE_STACK_TOP.fixup_alt_jump + = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; + COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; + COMPILE_STACK_TOP.regnum = regnum; + + /* We will eventually replace the 0 with the number of + groups inner to this one. But do not push a + start_memory for groups beyond the last one we can + represent in the compiled pattern. */ + if (regnum <= MAX_REGNUM) + { + COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; + BUF_PUSH_3 (start_memory, regnum, 0); + } + + compile_stack.avail++; + + fixup_alt_jump = 0; + laststart = 0; + begalt = b; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + break; + + + case ')': + if (syntax & RE_NO_BK_PARENS) goto normal_backslash; + + if (COMPILE_STACK_EMPTY) + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_backslash; + else + return REG_ERPAREN; + + handle_close: + if (fixup_alt_jump) + { /* Push a dummy failure point at the end of the + alternative for a possible future + `pop_failure_jump' to pop. See comments at + `push_dummy_failure' in `re_match_2'. */ + BUF_PUSH (push_dummy_failure); + + /* We allocated space for this jump when we assigned + to `fixup_alt_jump', in the `handle_alt' case below. */ + STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); + } + + /* See similar code for backslashed left paren above. */ + if (COMPILE_STACK_EMPTY) + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_char; + else + return REG_ERPAREN; + + /* Since we just checked for an empty stack above, this + ``can't happen''. */ + assert (compile_stack.avail != 0); + { + /* We don't just want to restore into `regnum', because + later groups should continue to be numbered higher, + as in `(ab)c(de)' -- the second group is #2. */ + regnum_t this_group_regnum; + + compile_stack.avail--; + begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; + fixup_alt_jump + = COMPILE_STACK_TOP.fixup_alt_jump + ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 + : 0; + laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; + this_group_regnum = COMPILE_STACK_TOP.regnum; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + + /* We're at the end of the group, so now we know how many + groups were inside this one. */ + if (this_group_regnum <= MAX_REGNUM) + { + unsigned char *inner_group_loc + = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; + + *inner_group_loc = regnum - this_group_regnum; + BUF_PUSH_3 (stop_memory, this_group_regnum, + regnum - this_group_regnum); + } + } + break; + + + case '|': /* `\|'. */ + if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) + goto normal_backslash; + handle_alt: + if (syntax & RE_LIMITED_OPS) + goto normal_char; + + /* Insert before the previous alternative a jump which + jumps to this alternative if the former fails. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (on_failure_jump, begalt, b + 6); + pending_exact = 0; + b += 3; + + /* The alternative before this one has a jump after it + which gets executed if it gets matched. Adjust that + jump so it will jump to this alternative's analogous + jump (put in below, which in turn will jump to the next + (if any) alternative's such jump, etc.). The last such + jump jumps to the correct final destination. A picture: + _____ _____ + | | | | + | v | v + a | b | c + + If we are at `b', then fixup_alt_jump right now points to a + three-byte space after `a'. We'll put in the jump, set + fixup_alt_jump to right after `b', and leave behind three + bytes which we'll fill in when we get to after `c'. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + /* Mark and leave space for a jump after this alternative, + to be filled in later either by next alternative or + when know we're at the end of a series of alternatives. */ + fixup_alt_jump = b; + GET_BUFFER_SPACE (3); + b += 3; + + laststart = 0; + begalt = b; + break; + + + case '{': + /* If \{ is a literal. */ + if (!(syntax & RE_INTERVALS) + /* If we're at `\{' and it's not the open-interval + operator. */ + || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + || (p - 2 == pattern && p == pend)) + goto normal_backslash; + + handle_interval: + { + /* If got here, then the syntax allows intervals. */ + + /* At least (most) this many matches must be made. */ + int lower_bound = -1, upper_bound = -1; + + beg_interval = p - 1; + + if (p == pend) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_EBRACE; + } + + GET_UNSIGNED_NUMBER (lower_bound); + + if (c == ',') + { + GET_UNSIGNED_NUMBER (upper_bound); + if (upper_bound < 0) upper_bound = RE_DUP_MAX; + } + else + /* Interval such as `{1}' => match exactly once. */ + upper_bound = lower_bound; + + if (lower_bound < 0 || upper_bound > RE_DUP_MAX + || lower_bound > upper_bound) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_BADBR; + } + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (c != '\\') return REG_EBRACE; + + PATFETCH (c); + } + + if (c != '}') + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_BADBR; + } + + /* We just parsed a valid interval. */ + + /* If it's invalid to have no preceding re. */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + return REG_BADRPT; + else if (syntax & RE_CONTEXT_INDEP_OPS) + laststart = b; + else + goto unfetch_interval; + } + + /* If the upper bound is zero, don't want to succeed at + all; jump from `laststart' to `b + 3', which will be + the end of the buffer after we insert the jump. */ + if (upper_bound == 0) + { + GET_BUFFER_SPACE (3); + INSERT_JUMP (jump, laststart, b + 3); + b += 3; + } + + /* Otherwise, we have a nontrivial interval. When + we're all done, the pattern will look like: + set_number_at + set_number_at + succeed_n + + jump_n + (The upper bound and `jump_n' are omitted if + `upper_bound' is 1, though.) */ + else + { /* If the upper bound is > 1, we need to insert + more at the end of the loop. */ + unsigned nbytes = 10 + (upper_bound > 1) * 10; + + GET_BUFFER_SPACE (nbytes); + + /* Initialize lower bound of the `succeed_n', even + though it will be set during matching by its + attendant `set_number_at' (inserted next), + because `re_compile_fastmap' needs to know. + Jump to the `jump_n' we might insert below. */ + INSERT_JUMP2 (succeed_n, laststart, + b + 5 + (upper_bound > 1) * 5, + lower_bound); + b += 5; + + /* Code to initialize the lower bound. Insert + before the `succeed_n'. The `5' is the last two + bytes of this `set_number_at', plus 3 bytes of + the following `succeed_n'. */ + insert_op2 (set_number_at, laststart, 5, lower_bound, b); + b += 5; + + if (upper_bound > 1) + { /* More than one repetition is allowed, so + append a backward jump to the `succeed_n' + that starts this interval. + + When we've reached this during matching, + we'll have matched the interval once, so + jump back only `upper_bound - 1' times. */ + STORE_JUMP2 (jump_n, b, laststart + 5, + upper_bound - 1); + b += 5; + + /* The location we want to set is the second + parameter of the `jump_n'; that is `b-2' as + an absolute address. `laststart' will be + the `set_number_at' we're about to insert; + `laststart+3' the number to set, the source + for the relative address. But we are + inserting into the middle of the pattern -- + so everything is getting moved up by 5. + Conclusion: (b - 2) - (laststart + 3) + 5, + i.e., b - laststart. + + We insert this at the beginning of the loop + so that if we fail during matching, we'll + reinitialize the bounds. */ + insert_op2 (set_number_at, laststart, b - laststart, + upper_bound - 1, b); + b += 5; + } + } + pending_exact = 0; + beg_interval = NULL; + } + break; + + unfetch_interval: + /* If an invalid interval, match the characters as literals. */ + assert (beg_interval); + p = beg_interval; + beg_interval = NULL; + + /* normal_char and normal_backslash need `c'. */ + PATFETCH (c); + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (p > pattern && p[-1] == '\\') + goto normal_backslash; + } + goto normal_char; + +#ifdef emacs + /* There is no way to specify the before_dot and after_dot + operators. rms says this is ok. --karl */ + case '=': + BUF_PUSH (at_dot); + break; + + case 's': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); + break; + + case 'S': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); + break; +#endif /* emacs */ + + + case 'w': + laststart = b; + BUF_PUSH (wordchar); + break; + + + case 'W': + laststart = b; + BUF_PUSH (notwordchar); + break; + + + case '<': + BUF_PUSH (wordbeg); + break; + + case '>': + BUF_PUSH (wordend); + break; + + case 'b': + BUF_PUSH (wordbound); + break; + + case 'B': + BUF_PUSH (notwordbound); + break; + + case '`': + BUF_PUSH (begbuf); + break; + + case '\'': + BUF_PUSH (endbuf); + break; + + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (syntax & RE_NO_BK_REFS) + goto normal_char; + + c1 = c - '0'; + + if (c1 > regnum) + return REG_ESUBREG; + + /* Can't back reference to a subexpression if inside of it. */ + if (group_in_compile_stack (compile_stack, c1)) + goto normal_char; + + laststart = b; + BUF_PUSH_2 (duplicate, c1); + break; + + + case '+': + case '?': + if (syntax & RE_BK_PLUS_QM) + goto handle_plus; + else + goto normal_backslash; + + default: + normal_backslash: + /* You might think it would be useful for \ to mean + not to translate; but if we don't translate it + it will never match anything. */ + c = TRANSLATE (c); + goto normal_char; + } + break; + + + default: + /* Expects the character in `c'. */ + normal_char: + /* If no exactn currently being built. */ + if (!pending_exact + + /* If last exactn not at current position. */ + || pending_exact + *pending_exact + 1 != b + + /* We have only one byte following the exactn for the count. */ + || *pending_exact == (1 << BYTEWIDTH) - 1 + + /* If followed by a repetition operator. */ + || *p == '*' || *p == '^' + || ((syntax & RE_BK_PLUS_QM) + ? *p == '\\' && (p[1] == '+' || p[1] == '?') + : (*p == '+' || *p == '?')) + || ((syntax & RE_INTERVALS) + && ((syntax & RE_NO_BK_BRACES) + ? *p == '{' + : (p[0] == '\\' && p[1] == '{')))) + { + /* Start building a new exactn. */ + + laststart = b; + + BUF_PUSH_2 (exactn, 0); + pending_exact = b - 1; + } + + BUF_PUSH (c); + (*pending_exact)++; + break; + } /* switch (c) */ + } /* while p != pend */ + + + /* Through the pattern now. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + if (!COMPILE_STACK_EMPTY) + return REG_EPAREN; + + free (compile_stack.stack); + + /* We have succeeded; set the length of the buffer. */ + bufp->used = b - bufp->buffer; + +#ifdef DEBUG + if (debug) + { + DEBUG_PRINT1 ("\nCompiled pattern: "); + print_compiled_pattern (bufp); + } +#endif /* DEBUG */ + + return REG_NOERROR; +} /* regex_compile */ + +/* Subroutines for `regex_compile'. */ + +/* Store OP at LOC followed by two-byte integer parameter ARG. */ + +static void +store_op1 (op, loc, arg) + re_opcode_t op; + unsigned char *loc; + int arg; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg); +} + + +/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +store_op2 (op, loc, arg1, arg2) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg1); + STORE_NUMBER (loc + 3, arg2); +} + + +/* Copy the bytes from LOC to END to open up three bytes of space at LOC + for OP followed by two-byte integer parameter ARG. */ + +static void +insert_op1 (op, loc, arg, end) + re_opcode_t op; + unsigned char *loc; + int arg; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 3; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op1 (op, loc, arg); +} + + +/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +insert_op2 (op, loc, arg1, arg2, end) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 5; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op2 (op, loc, arg1, arg2); +} + + +/* P points to just after a ^ in PATTERN. Return true if that ^ comes + after an alternative or a begin-subexpression. We assume there is at + least one character before the ^. */ + +static boolean +at_begline_loc_p (pattern, p, syntax) + const char *pattern, *p; + reg_syntax_t syntax; +{ + const char *prev = p - 2; + boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; + + return + /* After a subexpression? */ + (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) + /* After an alternative? */ + || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); +} + + +/* The dual of at_begline_loc_p. This one is for $. We assume there is + at least one character after the $, i.e., `P < PEND'. */ + +static boolean +at_endline_loc_p (p, pend, syntax) + const char *p, *pend; + int syntax; +{ + const char *next = p; + boolean next_backslash = *next == '\\'; + const char *next_next = p + 1 < pend ? p + 1 : NULL; + + return + /* Before a subexpression? */ + (syntax & RE_NO_BK_PARENS ? *next == ')' + : next_backslash && next_next && *next_next == ')') + /* Before an alternative? */ + || (syntax & RE_NO_BK_VBAR ? *next == '|' + : next_backslash && next_next && *next_next == '|'); +} + + +/* Returns true if REGNUM is in one of COMPILE_STACK's elements and + false if it's not. */ + +static boolean +group_in_compile_stack (compile_stack, regnum) + compile_stack_type compile_stack; + regnum_t regnum; +{ + int this_element; + + for (this_element = compile_stack.avail - 1; + this_element >= 0; + this_element--) + if (compile_stack.stack[this_element].regnum == regnum) + return true; + + return false; +} + + +/* Read the ending character of a range (in a bracket expression) from the + uncompiled pattern *P_PTR (which ends at PEND). We assume the + starting character is in `P[-2]'. (`P[-1]' is the character `-'.) + Then we set the translation of all bits between the starting and + ending characters (inclusive) in the compiled pattern B. + + Return an error code. + + We use these short variable names so we can use the same macros as + `regex_compile' itself. */ + +static reg_errcode_t +compile_range (p_ptr, pend, translate, syntax, b) + const char **p_ptr, *pend; + char *translate; + reg_syntax_t syntax; + unsigned char *b; +{ + unsigned this_char; + + const char *p = *p_ptr; + int range_start, range_end; + + if (p == pend) + return REG_ERANGE; + + /* Even though the pattern is a signed `char *', we need to fetch + with unsigned char *'s; if the high bit of the pattern character + is set, the range endpoints will be negative if we fetch using a + signed char *. + + We also want to fetch the endpoints without translating them; the + appropriate translation is done in the bit-setting loop below. */ + range_start = ((unsigned char *) p)[-2]; + range_end = ((unsigned char *) p)[0]; + + /* Have to increment the pointer into the pattern string, so the + caller isn't still at the ending character. */ + (*p_ptr)++; + + /* If the start is after the end, the range is empty. */ + if (range_start > range_end) + return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- the range is inclusive, so if `range_end' == 0xff + (assuming 8-bit characters), we would otherwise go into an infinite + loop, since all characters <= 0xff. */ + for (this_char = range_start; this_char <= range_end; this_char++) + { + SET_LIST_BIT (TRANSLATE (this_char)); + } + + return REG_NOERROR; +} + +/* Failure stack declarations and macros; both re_compile_fastmap and + re_match_2 use a failure stack. These have to be macros because of + REGEX_ALLOCATE. */ + + +/* Number of failure points for which to initially allocate space + when matching. If this number is exceeded, we allocate more + space, so it is not a hard limit. */ +#ifndef INIT_FAILURE_ALLOC +#define INIT_FAILURE_ALLOC 5 +#endif + +/* Roughly the maximum number of failure points on the stack. Would be + exactly that if always used MAX_FAILURE_SPACE each time we failed. + This is a variable only so users of regex can assign to it; we never + change it ourselves. */ +int re_max_failures = 2000; + +typedef const unsigned char *fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} fail_stack_type; + +#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) +#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) +#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) + + +/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ + +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.stack = (fail_stack_elt_t *) \ + REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + \ + if (fail_stack.stack == NULL) \ + return -2; \ + \ + fail_stack.size = INIT_FAILURE_ALLOC; \ + fail_stack.avail = 0; \ + } while (0) + + +/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. + + Return 1 if succeeds, and 0 if either ran out of memory + allocating space for it or it was already too large. + + REGEX_REALLOCATE requires `destination' be declared. */ + +#define DOUBLE_FAIL_STACK(fail_stack) \ + ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ + ? 0 \ + : ((fail_stack).stack = (fail_stack_elt_t *) \ + REGEX_REALLOCATE ((fail_stack).stack, \ + (fail_stack).size * sizeof (fail_stack_elt_t), \ + ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + \ + (fail_stack).stack == NULL \ + ? 0 \ + : ((fail_stack).size <<= 1, \ + 1))) + + +/* Push PATTERN_OP on FAIL_STACK. + + Return 1 if was able to do so and 0 if ran out of memory allocating + space to do so. */ +#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ + ((FAIL_STACK_FULL () \ + && !DOUBLE_FAIL_STACK (fail_stack)) \ + ? 0 \ + : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ + 1)) + +/* This pushes an item onto the failure stack. Must be a four-byte + value. Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_ITEM(item) \ + fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item + +/* The complement operation. Assumes `fail_stack' is nonempty. */ +#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] + +/* Used to omit pushing failure point id's when we're not debugging. */ +#ifdef DEBUG +#define DEBUG_PUSH PUSH_FAILURE_ITEM +#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () +#else +#define DEBUG_PUSH(item) +#define DEBUG_POP(item_addr) +#endif + + +/* Push the information about the state we will need + if we ever fail back to it. + + Requires variables fail_stack, regstart, regend, reg_info, and + num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be + declared. + + Does `return FAILURE_CODE' if runs out of memory. */ + +#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ + do { \ + char *destination; \ + /* Must be int, so when we don't save any registers, the arithmetic \ + of 0 + -1 isn't done as unsigned. */ \ + int this_reg; \ + \ + DEBUG_STATEMENT (failure_id++); \ + DEBUG_STATEMENT (nfailure_points_pushed++); \ + DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ + DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ + DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ + \ + DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ + DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ + \ + /* Ensure we have enough space allocated for what we will push. */ \ + while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ + { \ + if (!DOUBLE_FAIL_STACK (fail_stack)) \ + return failure_code; \ + \ + DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ + (fail_stack).size); \ + DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ + } \ + \ + /* Push the info, starting with the registers. */ \ + DEBUG_PRINT1 ("\n"); \ + \ + for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ + this_reg++) \ + { \ + DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ + DEBUG_STATEMENT (num_regs_pushed++); \ + \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + PUSH_FAILURE_ITEM (regstart[this_reg]); \ + \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + PUSH_FAILURE_ITEM (regend[this_reg]); \ + \ + DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ + DEBUG_PRINT2 (" match_null=%d", \ + REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ + DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ + DEBUG_PRINT2 (" matched_something=%d", \ + MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT2 (" ever_matched=%d", \ + EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT1 ("\n"); \ + PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ + } \ + \ + DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ + PUSH_FAILURE_ITEM (lowest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ + PUSH_FAILURE_ITEM (highest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ + PUSH_FAILURE_ITEM (pattern_place); \ + \ + DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ + DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ + size2); \ + DEBUG_PRINT1 ("'\n"); \ + PUSH_FAILURE_ITEM (string_place); \ + \ + DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ + DEBUG_PUSH (failure_id); \ + } while (0) + +/* This is the number of items that are pushed and popped on the stack + for each register. */ +#define NUM_REG_ITEMS 3 + +/* Individual items aside from the registers. */ +#ifdef DEBUG +#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ +#else +#define NUM_NONREG_ITEMS 4 +#endif + +/* We push at most this many items on the stack. */ +#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + +/* We actually push this many items. */ +#define NUM_FAILURE_ITEMS \ + ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ + + NUM_NONREG_ITEMS) + +/* How many items can still be added to the stack without overflowing it. */ +#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + + +/* Pops what PUSH_FAIL_STACK pushes. + + We restore into the parameters, all of which should be lvalues: + STR -- the saved data position. + PAT -- the saved pattern position. + LOW_REG, HIGH_REG -- the highest and lowest active registers. + REGSTART, REGEND -- arrays of string positions. + REG_INFO -- array of information about each subexpression. + + Also assumes the variables `fail_stack' and (if debugging), `bufp', + `pend', `string1', `size1', `string2', and `size2'. */ + +#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +{ \ + DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ + int this_reg; \ + const unsigned char *string_temp; \ + \ + assert (!FAIL_STACK_EMPTY ()); \ + \ + /* Remove failure points and point to how many regs pushed. */ \ + DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ + DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ + DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ + \ + assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ + \ + DEBUG_POP (&failure_id); \ + DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ + \ + /* If the saved string location is NULL, it came from an \ + on_failure_keep_string_jump opcode, and we want to throw away the \ + saved NULL, thus retaining our current position in the string. */ \ + string_temp = POP_FAILURE_ITEM (); \ + if (string_temp != NULL) \ + str = (const char *) string_temp; \ + \ + DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ + DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ + DEBUG_PRINT1 ("'\n"); \ + \ + pat = (unsigned char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ + \ + /* Restore register info. */ \ + high_reg = (unsigned) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ + \ + low_reg = (unsigned) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ + \ + for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ + { \ + DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ + \ + reg_info[this_reg].word = POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ + \ + regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + \ + regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + } \ + \ + DEBUG_STATEMENT (nfailure_points_popped++); \ +} /* POP_FAILURE_POINT */ + +/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in + BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible + characters can start a string that matches the pattern. This fastmap + is used by re_search to skip quickly over impossible starting points. + + The caller must supply the address of a (1 << BYTEWIDTH)-byte data + area as BUFP->fastmap. + + We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in + the pattern buffer. + + Returns 0 if we succeed, -2 if an internal error. */ + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + int j, k; + fail_stack_type fail_stack; +#ifndef REGEX_MALLOC + char *destination; +#endif + /* We don't push any register information onto the failure stack. */ + unsigned num_regs = 0; + + register char *fastmap = bufp->fastmap; + unsigned char *pattern = bufp->buffer; + unsigned long size = bufp->used; + const unsigned char *p = pattern; + register unsigned char *pend = pattern + size; + + /* Assume that each path through the pattern can be null until + proven otherwise. We set this false at the bottom of switch + statement, to which we get only if a particular path doesn't + match the empty string. */ + boolean path_can_be_null = true; + + /* We aren't doing a `succeed_n' to begin with. */ + boolean succeed_n_p = false; + + assert (fastmap != NULL && p != NULL); + + INIT_FAIL_STACK (); + bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ + bufp->fastmap_accurate = 1; /* It will be when we're done. */ + bufp->can_be_null = 0; + + while (p != pend || !FAIL_STACK_EMPTY ()) + { + if (p == pend) + { + bufp->can_be_null |= path_can_be_null; + + /* Reset for next path. */ + path_can_be_null = true; + + p = fail_stack.stack[--fail_stack.avail]; + } + + /* We should never be about to go beyond the end of the pattern. */ + assert (p < pend); + +#ifdef SWITCH_ENUM_BUG + switch ((int) ((re_opcode_t) *p++)) +#else + switch ((re_opcode_t) *p++) +#endif + { + + /* I guess the idea here is to simply not bother with a fastmap + if a backreference is used, since it's too hard to figure out + the fastmap for the corresponding group. Setting + `can_be_null' stops `re_search_2' from using the fastmap, so + that is all we do. */ + case duplicate: + bufp->can_be_null = 1; + return 0; + + + /* Following are the cases which match a character. These end + with `break'. */ + + case exactn: + fastmap[p[1]] = 1; + break; + + + case charset: + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) + fastmap[j] = 1; + break; + + + case charset_not: + /* Chars beyond end of map must be allowed. */ + for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) + fastmap[j] = 1; + break; + + + case wordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == Sword) + fastmap[j] = 1; + break; + + + case notwordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != Sword) + fastmap[j] = 1; + break; + + + case anychar: + /* `.' matches anything ... */ + for (j = 0; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + /* ... except perhaps newline. */ + if (!(bufp->syntax & RE_DOT_NEWLINE)) + fastmap['\n'] = 0; + + /* Return if we have already set `can_be_null'; if we have, + then the fastmap is irrelevant. Something's wrong here. */ + else if (bufp->can_be_null) + return 0; + + /* Otherwise, have to check alternative paths. */ + break; + + +#ifdef emacs + case syntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + case notsyntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + /* All cases after this match the empty string. These end with + `continue'. */ + + + case before_dot: + case at_dot: + case after_dot: + continue; +#endif /* not emacs */ + + + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbound: + case notwordbound: + case wordbeg: + case wordend: + case push_dummy_failure: + continue; + + + case jump_n: + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case jump_past_alt: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + if (j > 0) + continue; + + /* Jump backward implies we just went through the body of a + loop and matched nothing. Opcode jumped to should be + `on_failure_jump' or `succeed_n'. Just treat it like an + ordinary jump. For a * loop, it has pushed its failure + point already; if so, discard that as redundant. */ + if ((re_opcode_t) *p != on_failure_jump + && (re_opcode_t) *p != succeed_n) + continue; + + p++; + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + + /* If what's on the stack is where we are now, pop it. */ + if (!FAIL_STACK_EMPTY () + && fail_stack.stack[fail_stack.avail - 1] == p) + fail_stack.avail--; + + continue; + + + case on_failure_jump: + case on_failure_keep_string_jump: + handle_on_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + + /* For some patterns, e.g., `(a?)?', `p+j' here points to the + end of the pattern. We don't want to push such a point, + since when we restore it above, entering the switch will + increment `p' past the end of the pattern. We don't need + to push such a point since we obviously won't find any more + fastmap entries beyond `pend'. Such a pattern can match + the null string, though. */ + if (p + j < pend) + { + if (!PUSH_PATTERN_OP (p + j, fail_stack)) + return -2; + } + else + bufp->can_be_null = 1; + + if (succeed_n_p) + { + EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ + succeed_n_p = false; + } + + continue; + + + case succeed_n: + /* Get to the number of times to succeed. */ + p += 2; + + /* Increment p past the n for when k != 0. */ + EXTRACT_NUMBER_AND_INCR (k, p); + if (k == 0) + { + p -= 4; + succeed_n_p = true; /* Spaghetti code alert. */ + goto handle_on_failure_jump; + } + continue; + + + case set_number_at: + p += 4; + continue; + + + case start_memory: + case stop_memory: + p += 2; + continue; + + + default: + abort (); /* We have listed all the cases. */ + } /* switch *p++ */ + + /* Getting here means we have found the possible starting + characters for one path of the pattern -- and that the empty + string does not match. We need not follow this path further. + Instead, look at the next alternative (remembered on the + stack), or quit if no more. The test at the top of the loop + does these things. */ + path_can_be_null = false; + p = pend; + } /* while p */ + + /* Set `can_be_null' for the last path (also the first path, if the + pattern is empty). */ + bufp->can_be_null |= path_can_be_null; + return 0; +} /* re_compile_fastmap */ + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t) 0; + } +} + +/* Searching routines. */ + +/* Like re_search_2, below, but only one string is specified, and + doesn't let you say where to stop matching. */ + +int +re_search (bufp, string, size, startpos, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, startpos, range; + struct re_registers *regs; +{ + return re_search_2 (bufp, NULL, 0, string, size, startpos, range, + regs, size); +} + + +/* Using the compiled pattern in BUFP->buffer, first tries to match the + virtual concatenation of STRING1 and STRING2, starting first at index + STARTPOS, then at STARTPOS + 1, and so on. + + STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. + + RANGE is how far to scan while trying to match. RANGE = 0 means try + only at STARTPOS; in general, the last start tried is STARTPOS + + RANGE. + + In REGS, return the indices of the virtual concatenation of STRING1 + and STRING2 that matched the entire BUFP->buffer and its contained + subexpressions. + + Do not consider matching one past the index STOP in the virtual + concatenation of STRING1 and STRING2. + + We return either the position in the strings at which the match was + found, -1 if no match, or -2 if error (such as failure + stack overflow). */ + +int +re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int startpos; + int range; + struct re_registers *regs; + int stop; +{ + int val; + register char *fastmap = bufp->fastmap; + register char *translate = bufp->translate; + int total_size = size1 + size2; + int endpos = startpos + range; + + /* Check for out-of-range STARTPOS. */ + if (startpos < 0 || startpos > total_size) + return -1; + + /* Fix up RANGE if it might eventually take us outside + the virtual concatenation of STRING1 and STRING2. */ + if (endpos < -1) + range = -1 - startpos; + else if (endpos > total_size) + range = total_size - startpos; + + /* If the search isn't to be a backwards one, don't waste time in a + search for a pattern that must be anchored. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) + { + if (startpos > 0) + return -1; + else + range = 1; + } + + /* Update the fastmap now if not correct already. */ + if (fastmap && !bufp->fastmap_accurate) + if (re_compile_fastmap (bufp) == -2) + return -2; + + /* Loop through the string, looking for a place to start matching. */ + for (;;) + { + /* If a fastmap is supplied, skip quickly over characters that + cannot be the start of a match. If the pattern can match the + null string, however, we don't need to skip characters; we want + the first null string. */ + if (fastmap && startpos < total_size && !bufp->can_be_null) + { + if (range > 0) /* Searching forwards. */ + { + register const char *d; + register int lim = 0; + int irange = range; + + if (startpos < size1 && startpos + range >= size1) + lim = range - (size1 - startpos); + + d = (startpos >= size1 ? string2 - size1 : string1) + startpos; + + /* Written out as an if-else to avoid testing `translate' + inside the loop. */ + if (translate) + while (range > lim + && !fastmap[(unsigned char) + translate[(unsigned char) *d++]]) + range--; + else + while (range > lim && !fastmap[(unsigned char) *d++]) + range--; + + startpos += irange - range; + } + else /* Searching backwards. */ + { + register char c = (size1 == 0 || startpos >= size1 + ? string2[startpos - size1] + : string1[startpos]); + + if (!fastmap[(unsigned char) TRANSLATE (c)]) + goto advance; + } + } + + /* If can't match the null string, and that's all we have left, fail. */ + if (range >= 0 && startpos == total_size && fastmap + && !bufp->can_be_null) + return -1; + + val = re_match_2 (bufp, string1, size1, string2, size2, + startpos, regs, stop); + if (val >= 0) + return startpos; + + if (val == -2) + return -2; + + advance: + if (!range) + break; + else if (range > 0) + { + range--; + startpos++; + } + else + { + range++; + startpos--; + } + } + return -1; +} /* re_search_2 */ + +/* Declarations and macros for re_match_2. */ + +static int bcmp_translate (); +static boolean alt_match_null_string_p (), + common_op_match_null_string_p (), + group_match_null_string_p (); + +/* Structure for per-register (a.k.a. per-group) information. + This must not be longer than one word, because we push this value + onto the failure stack. Other register information, such as the + starting and ending positions (which are addresses), and the list of + inner groups (which is a bits list) are maintained in separate + variables. + + We are making a (strictly speaking) nonportable assumption here: that + the compiler will pack our bit fields into something that fits into + the type of `word', i.e., is something that fits into one item on the + failure stack. */ +typedef union +{ + fail_stack_elt_t word; + struct + { + /* This field is one if this group can match the empty string, + zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ +#define MATCH_NULL_UNSET_VALUE 3 + unsigned match_null_string_p : 2; + unsigned is_active : 1; + unsigned matched_something : 1; + unsigned ever_matched_something : 1; + } bits; +} register_info_type; + +#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) +#define IS_ACTIVE(R) ((R).bits.is_active) +#define MATCHED_SOMETHING(R) ((R).bits.matched_something) +#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) + + +/* Call this when have matched a real character; it sets `matched' flags + for the subexpressions which we are currently inside. Also records + that those subexprs have matched. */ +#define SET_REGS_MATCHED() \ + do \ + { \ + unsigned r; \ + for (r = lowest_active_reg; r <= highest_active_reg; r++) \ + { \ + MATCHED_SOMETHING (reg_info[r]) \ + = EVER_MATCHED_SOMETHING (reg_info[r]) \ + = 1; \ + } \ + } \ + while (0) + + +/* This converts PTR, a pointer into one of the search strings `string1' + and `string2' into an offset from the beginning of that string. */ +#define POINTER_TO_OFFSET(ptr) \ + (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) + +/* Registers are set to a sentinel when they haven't yet matched. */ +#define REG_UNSET_VALUE ((char *) -1) +#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) + + +/* Macros for dealing with the split strings in re_match_2. */ + +#define MATCHING_IN_FIRST_STRING (dend == end_match_1) + +/* Call before fetching a character with *d. This switches over to + string2 if necessary. */ +#define PREFETCH() \ + while (d == dend) \ + { \ + /* End of string2 => fail. */ \ + if (dend == end_match_2) \ + goto fail; \ + /* End of string1 => advance to string2. */ \ + d = string2; \ + dend = end_match_2; \ + } + + +/* Test if at very beginning or at very end of the virtual concatenation + of `string1' and `string2'. If only one string, it's `string2'. */ +#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) +#define AT_STRINGS_END(d) ((d) == end2) + + +/* Test if D points to a character which is word-constituent. We have + two special cases to check for: if past the end of string1, look at + the first character in string2; and if before the beginning of + string2, look at the last character in string1. */ +#define WORDCHAR_P(d) \ + (SYNTAX ((d) == end1 ? *string2 \ + : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ + == Sword) + +/* Test if the character before D and the one at D differ with respect + to being word-constituent. */ +#define AT_WORD_BOUNDARY(d) \ + (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ + || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) + + +/* Free everything we malloc. */ +#ifdef REGEX_MALLOC +#define FREE_VAR(var) if (var) free (var); var = NULL +#define FREE_VARIABLES() \ + do { \ + FREE_VAR (fail_stack.stack); \ + FREE_VAR (regstart); \ + FREE_VAR (regend); \ + FREE_VAR (old_regstart); \ + FREE_VAR (old_regend); \ + FREE_VAR (best_regstart); \ + FREE_VAR (best_regend); \ + FREE_VAR (reg_info); \ + FREE_VAR (reg_dummy); \ + FREE_VAR (reg_info_dummy); \ + } while (0) +#else /* not REGEX_MALLOC */ +/* Some MIPS systems (at least) want this to free alloca'd storage. */ +#define FREE_VARIABLES() alloca (0) +#endif /* not REGEX_MALLOC */ + + +/* These values must meet several constraints. They must not be valid + register values; since we have a limit of 255 registers (because + we use only one byte in the pattern for the register number), we can + use numbers larger than 255. They must differ by 1, because of + NUM_FAILURE_ITEMS above. And the value for the lowest register must + be larger than the value for the highest register, so we do not try + to actually save any registers when none are active. */ +#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) +#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) + +/* Matching routines. */ + +#ifndef emacs /* Emacs never uses this. */ +/* re_match is like re_match_2 except it takes only a single string. */ + +int +re_match (bufp, string, size, pos, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, pos; + struct re_registers *regs; + { + return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); +} +#endif /* not emacs */ + + +/* re_match_2 matches the compiled pattern in BUFP against the + the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 + and SIZE2, respectively). We start matching at POS, and stop + matching at STOP. + + If REGS is non-null and the `no_sub' field of BUFP is nonzero, we + store offsets for the substring each group matched in REGS. See the + documentation for exactly how many groups we fill. + + We return -1 if no match, -2 if an internal error (such as the + failure stack overflowing). Otherwise, we return the length of the + matched substring. */ + +int +re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; +{ + /* General temporaries. */ + int mcnt; + unsigned char *p1; + + /* Just past the end of the corresponding string. */ + const char *end1, *end2; + + /* Pointers into string1 and string2, just past the last characters in + each to consider matching. */ + const char *end_match_1, *end_match_2; + + /* Where we are in the data, and the end of the current string. */ + const char *d, *dend; + + /* Where we are in the pattern, and the end of the pattern. */ + unsigned char *p = bufp->buffer; + register unsigned char *pend = p + bufp->used; + + /* We use this to map every character in the string. */ + char *translate = bufp->translate; + + /* Failure point stack. Each place that can handle a failure further + down the line pushes a failure point on this stack. It consists of + restart, regend, and reg_info for all registers corresponding to + the subexpressions we're currently inside, plus the number of such + registers, and, finally, two char *'s. The first char * is where + to resume scanning the pattern; the second one is where to resume + scanning the strings. If the latter is zero, the failure point is + a ``dummy''; if a failure happens and the failure point is a dummy, + it gets discarded and the next next one is tried. */ + fail_stack_type fail_stack; +#ifdef DEBUG + static unsigned failure_id = 0; + unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; +#endif + + /* We fill all the registers internally, independent of what we + return, for use in backreferences. The number here includes + an element for register zero. */ + unsigned num_regs = bufp->re_nsub + 1; + + /* The currently active registers. */ + unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; + unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; + + /* Information on the contents of registers. These are pointers into + the input strings; they record just what was matched (on this + attempt) by a subexpression part of the pattern, that is, the + regnum-th regstart pointer points to where in the pattern we began + matching and the regnum-th regend points to right after where we + stopped matching the regnum-th subexpression. (The zeroth register + keeps track of what the whole pattern matches.) */ + const char **regstart, **regend; + + /* If a group that's operated upon by a repetition operator fails to + match anything, then the register for its start will need to be + restored because it will have been set to wherever in the string we + are when we last see its open-group operator. Similarly for a + register's end. */ + const char **old_regstart, **old_regend; + + /* The is_active field of reg_info helps us keep track of which (possibly + nested) subexpressions we are currently in. The matched_something + field of reg_info[reg_num] helps us tell whether or not we have + matched any of the pattern so far this time through the reg_num-th + subexpression. These two fields get reset each time through any + loop their register is in. */ + register_info_type *reg_info; + + /* The following record the register info as found in the above + variables when we find a match better than any we've seen before. + This happens as we backtrack through the failure points, which in + turn happens only if we have not yet matched the entire string. */ + unsigned best_regs_set = false; + const char **best_regstart, **best_regend; + + /* Logically, this is `best_regend[0]'. But we don't want to have to + allocate space for that if we're not allocating space for anything + else (see below). Also, we never need info about register 0 for + any of the other register vectors, and it seems rather a kludge to + treat `best_regend' differently than the rest. So we keep track of + the end of the best match so far in a separate variable. We + initialize this to NULL so that when we backtrack the first time + and need to test it, it's not garbage. */ + const char *match_end = NULL; + + /* Used when we pop values we don't care about. */ + const char **reg_dummy; + register_info_type *reg_info_dummy; + +#ifdef DEBUG + /* Counts the total number of registers pushed. */ + unsigned num_regs_pushed = 0; +#endif + + DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); + + INIT_FAIL_STACK (); + + /* Do not bother to initialize all the register variables if there are + no groups in the pattern, as it takes a fair amount of time. If + there are groups, we include space for register 0 (the whole + pattern), even though we never use it, since it simplifies the + array indexing. We should fix this. */ + if (bufp->re_nsub) + { + regstart = REGEX_TALLOC (num_regs, const char *); + regend = REGEX_TALLOC (num_regs, const char *); + old_regstart = REGEX_TALLOC (num_regs, const char *); + old_regend = REGEX_TALLOC (num_regs, const char *); + best_regstart = REGEX_TALLOC (num_regs, const char *); + best_regend = REGEX_TALLOC (num_regs, const char *); + reg_info = REGEX_TALLOC (num_regs, register_info_type); + reg_dummy = REGEX_TALLOC (num_regs, const char *); + reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); + + if (!(regstart && regend && old_regstart && old_regend && reg_info + && best_regstart && best_regend && reg_dummy && reg_info_dummy)) + { + FREE_VARIABLES (); + return -2; + } + } +#ifdef REGEX_MALLOC + else + { + /* We must initialize all our variables to NULL, so that + `FREE_VARIABLES' doesn't try to free them. */ + regstart = regend = old_regstart = old_regend = best_regstart + = best_regend = reg_dummy = NULL; + reg_info = reg_info_dummy = (register_info_type *) NULL; + } +#endif /* REGEX_MALLOC */ + + /* The starting position is bogus. */ + if (pos < 0 || pos > size1 + size2) + { + FREE_VARIABLES (); + return -1; + } + + /* Initialize subexpression text positions to -1 to mark ones that no + start_memory/stop_memory has been seen for. Also initialize the + register information struct. */ + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + regstart[mcnt] = regend[mcnt] + = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; + + REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; + IS_ACTIVE (reg_info[mcnt]) = 0; + MATCHED_SOMETHING (reg_info[mcnt]) = 0; + EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; + } + + /* We move `string1' into `string2' if the latter's empty -- but not if + `string1' is null. */ + if (size2 == 0 && string1 != NULL) + { + string2 = string1; + size2 = size1; + string1 = 0; + size1 = 0; + } + end1 = string1 + size1; + end2 = string2 + size2; + + /* Compute where to stop matching, within the two strings. */ + if (stop <= size1) + { + end_match_1 = string1 + stop; + end_match_2 = string2; + } + else + { + end_match_1 = end1; + end_match_2 = string2 + stop - size1; + } + + /* `p' scans through the pattern as `d' scans through the data. + `dend' is the end of the input string that `d' points within. `d' + is advanced into the following input string whenever necessary, but + this happens before fetching; therefore, at the beginning of the + loop, `d' can be pointing at the end of a string, but it cannot + equal `string2'. */ + if (size1 > 0 && pos <= size1) + { + d = string1 + pos; + dend = end_match_1; + } + else + { + d = string2 + pos - size1; + dend = end_match_2; + } + + DEBUG_PRINT1 ("The compiled pattern is: "); + DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); + DEBUG_PRINT1 ("The string to match is: `"); + DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); + DEBUG_PRINT1 ("'\n"); + + /* This loops over pattern commands. It exits by returning from the + function if the match is complete, or it drops through if the match + fails at this starting point in the input data. */ + for (;;) + { + DEBUG_PRINT2 ("\n0x%x: ", p); + + if (p == pend) + { /* End of pattern means we might have succeeded. */ + DEBUG_PRINT1 ("end of pattern ... "); + + /* If we haven't matched the entire string, and we want the + longest match, try backtracking. */ + if (d != end_match_2) + { + DEBUG_PRINT1 ("backtracking.\n"); + + if (!FAIL_STACK_EMPTY ()) + { /* More failure points to try. */ + boolean same_str_p = (FIRST_STRING_P (match_end) + == MATCHING_IN_FIRST_STRING); + + /* If exceeds best match so far, save it. */ + if (!best_regs_set + || (same_str_p && d > match_end) + || (!same_str_p && !MATCHING_IN_FIRST_STRING)) + { + best_regs_set = true; + match_end = d; + + DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); + + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + best_regstart[mcnt] = regstart[mcnt]; + best_regend[mcnt] = regend[mcnt]; + } + } + goto fail; + } + + /* If no failure points, don't restore garbage. */ + else if (best_regs_set) + { + restore_best_regs: + /* Restore best match. It may happen that `dend == + end_match_1' while the restored d is in string2. + For example, the pattern `x.*y.*z' against the + strings `x-' and `y-z-', if the two strings are + not consecutive in memory. */ + DEBUG_PRINT1 ("Restoring best registers.\n"); + + d = match_end; + dend = ((d >= string1 && d <= end1) + ? end_match_1 : end_match_2); + + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + regstart[mcnt] = best_regstart[mcnt]; + regend[mcnt] = best_regend[mcnt]; + } + } + } /* d != end_match_2 */ + + DEBUG_PRINT1 ("Accepting match.\n"); + + /* If caller wants register contents data back, do it. */ + if (regs && !bufp->no_sub) + { + /* Have the register data arrays been allocated? */ + if (bufp->regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. We need one + extra element beyond `num_regs' for the `-1' marker + GNU code uses. */ + regs->num_regs = MAX (RE_NREGS, num_regs + 1); + regs->start = TALLOC (regs->num_regs, regoff_t); + regs->end = TALLOC (regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + return -2; + bufp->regs_allocated = REGS_REALLOCATE; + } + else if (bufp->regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (regs->num_regs < num_regs + 1) + { + regs->num_regs = num_regs + 1; + RETALLOC (regs->start, regs->num_regs, regoff_t); + RETALLOC (regs->end, regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + return -2; + } + } + else + assert (bufp->regs_allocated == REGS_FIXED); + + /* Convert the pointer data in `regstart' and `regend' to + indices. Register zero has to be set differently, + since we haven't kept track of any info for it. */ + if (regs->num_regs > 0) + { + regs->start[0] = pos; + regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 + : d - string2 + size1); + } + + /* Go through the first `min (num_regs, regs->num_regs)' + registers, since that is all we initialized. */ + for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) + { + if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) + regs->start[mcnt] = regs->end[mcnt] = -1; + else + { + regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); + regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); + } + } + + /* If the regs structure we return has more elements than + were in the pattern, set the extra elements to -1. If + we (re)allocated the registers, this is the case, + because we always allocate enough to have at least one + -1 at the end. */ + for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) + regs->start[mcnt] = regs->end[mcnt] = -1; + } /* regs && !bufp->no_sub */ + + FREE_VARIABLES (); + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", + nfailure_points_pushed, nfailure_points_popped, + nfailure_points_pushed - nfailure_points_popped); + DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); + + mcnt = d - pos - (MATCHING_IN_FIRST_STRING + ? string1 + : string2 - size1); + + DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); + + return mcnt; + } + + /* Otherwise match next pattern command. */ +#ifdef SWITCH_ENUM_BUG + switch ((int) ((re_opcode_t) *p++)) +#else + switch ((re_opcode_t) *p++) +#endif + { + /* Ignore these. Used to ignore the n of succeed_n's which + currently have n == 0. */ + case no_op: + DEBUG_PRINT1 ("EXECUTING no_op.\n"); + break; + + + /* Match the next n pattern characters exactly. The following + byte in the pattern defines n, and the n bytes after that + are the characters to match. */ + case exactn: + mcnt = *p++; + DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); + + /* This is written out as an if-else so we don't waste time + testing `translate' inside the loop. */ + if (translate) + { + do + { + PREFETCH (); + if (translate[(unsigned char) *d++] != (char) *p++) + goto fail; + } + while (--mcnt); + } + else + { + do + { + PREFETCH (); + if (*d++ != (char) *p++) goto fail; + } + while (--mcnt); + } + SET_REGS_MATCHED (); + break; + + + /* Match any character except possibly a newline or a null. */ + case anychar: + DEBUG_PRINT1 ("EXECUTING anychar.\n"); + + PREFETCH (); + + if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') + || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) + goto fail; + + SET_REGS_MATCHED (); + DEBUG_PRINT2 (" Matched `%d'.\n", *d); + d++; + break; + + + case charset: + case charset_not: + { + register unsigned char c; + boolean not = (re_opcode_t) *(p - 1) == charset_not; + + DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); + + PREFETCH (); + c = TRANSLATE (*d); /* The character to match. */ + + /* Cast to `unsigned' instead of `unsigned char' in case the + bit list is a full 32 bytes long. */ + if (c < (unsigned) (*p * BYTEWIDTH) + && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + p += 1 + *p; + + if (!not) goto fail; + + SET_REGS_MATCHED (); + d++; + break; + } + + + /* The beginning of a group is represented by start_memory. + The arguments are the register number in the next byte, and the + number of groups inner to this one in the next. The text + matched within the group is recorded (in the internal + registers data structure) under the register number. */ + case start_memory: + DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); + + /* Find out if this group can match the empty string. */ + p1 = p; /* To send to group_match_null_string_p. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[*p]) + = group_match_null_string_p (&p1, pend, reg_info); + + /* Save the position in the string where we were the last time + we were at this open-group operator in case the group is + operated upon by a repetition operator, e.g., with `(a*)*b' + against `ab'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regstart[*p]) ? d : regstart[*p] + : regstart[*p]; + DEBUG_PRINT2 (" old_regstart: %d\n", + POINTER_TO_OFFSET (old_regstart[*p])); + + regstart[*p] = d; + DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); + + IS_ACTIVE (reg_info[*p]) = 1; + MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* This is the new highest active register. */ + highest_active_reg = *p; + + /* If nothing was active before, this is the new lowest active + register. */ + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *p; + + /* Move past the register number and inner group count. */ + p += 2; + break; + + + /* The stop_memory opcode represents the end of a group. Its + arguments are the same as start_memory's: the register + number, and the number of inner groups. */ + case stop_memory: + DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); + + /* We need to save the string position the last time we were at + this close-group operator in case the group is operated + upon by a repetition operator, e.g., with `((a*)*(b*)*)*' + against `aba'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regend[*p]) ? d : regend[*p] + : regend[*p]; + DEBUG_PRINT2 (" old_regend: %d\n", + POINTER_TO_OFFSET (old_regend[*p])); + + regend[*p] = d; + DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); + + /* This register isn't active anymore. */ + IS_ACTIVE (reg_info[*p]) = 0; + + /* If this was the only register active, nothing is active + anymore. */ + if (lowest_active_reg == highest_active_reg) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + { /* We must scan for the new highest active register, since + it isn't necessarily one less than now: consider + (a(b)c(d(e)f)g). When group 3 ends, after the f), the + new highest active register is 1. */ + unsigned char r = *p - 1; + while (r > 0 && !IS_ACTIVE (reg_info[r])) + r--; + + /* If we end up at register zero, that means that we saved + the registers as the result of an `on_failure_jump', not + a `start_memory', and we jumped to past the innermost + `stop_memory'. For example, in ((.)*) we save + registers 1 and 2 as a result of the *, but when we pop + back to the second ), we are at the stop_memory 1. + Thus, nothing is active. */ + if (r == 0) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + highest_active_reg = r; + } + + /* If just failed to match something this time around with a + group that's operated on by a repetition operator, try to + force exit from the ``loop'', and restore the register + information for this group that we had before trying this + last match. */ + if ((!MATCHED_SOMETHING (reg_info[*p]) + || (re_opcode_t) p[-3] == start_memory) + && (p + 2) < pend) + { + boolean is_a_jump_n = false; + + p1 = p + 2; + mcnt = 0; + switch ((re_opcode_t) *p1++) + { + case jump_n: + is_a_jump_n = true; + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (is_a_jump_n) + p1 += 2; + break; + + default: + /* do nothing */ ; + } + p1 += mcnt; + + /* If the next operation is a jump backwards in the pattern + to an on_failure_jump right before the start_memory + corresponding to this stop_memory, exit from the loop + by forcing a failure after pushing on the stack the + on_failure_jump's jump in the pattern, and d. */ + if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump + && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) + { + /* If this group ever matched anything, then restore + what its registers were before trying this last + failed match, e.g., with `(a*)*b' against `ab' for + regstart[1], and, e.g., with `((a*)*(b*)*)*' + against `aba' for regend[3]. + + Also restore the registers for inner groups for, + e.g., `((a*)(b*))*' against `aba' (register 3 would + otherwise get trashed). */ + + if (EVER_MATCHED_SOMETHING (reg_info[*p])) + { + unsigned r; + + EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* Restore this and inner groups' (if any) registers. */ + for (r = *p; r < *p + *(p + 1); r++) + { + regstart[r] = old_regstart[r]; + + /* xx why this test? */ + if ((int) old_regend[r] >= (int) regstart[r]) + regend[r] = old_regend[r]; + } + } + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + PUSH_FAILURE_POINT (p1 + mcnt, d, -2); + + goto fail; + } + } + + /* Move past the register number and the inner group count. */ + p += 2; + break; + + + /* \ has been turned into a `duplicate' command which is + followed by the numeric value of as the register number. */ + case duplicate: + { + register const char *d2, *dend2; + int regno = *p++; /* Get which register to match against. */ + DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); + + /* Can't back reference a group which we've never matched. */ + if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) + goto fail; + + /* Where in input to try to start matching. */ + d2 = regstart[regno]; + + /* Where to stop matching; if both the place to start and + the place to stop matching are in the same string, then + set to the place to stop, otherwise, for now have to use + the end of the first string. */ + + dend2 = ((FIRST_STRING_P (regstart[regno]) + == FIRST_STRING_P (regend[regno])) + ? regend[regno] : end_match_1); + for (;;) + { + /* If necessary, advance to next segment in register + contents. */ + while (d2 == dend2) + { + if (dend2 == end_match_2) break; + if (dend2 == regend[regno]) break; + + /* End of string1 => advance to string2. */ + d2 = string2; + dend2 = regend[regno]; + } + /* At end of register contents => success */ + if (d2 == dend2) break; + + /* If necessary, advance to next segment in data. */ + PREFETCH (); + + /* How many characters left in this segment to match. */ + mcnt = dend - d; + + /* Want how many consecutive characters we can match in + one shot, so, if necessary, adjust the count. */ + if (mcnt > dend2 - d2) + mcnt = dend2 - d2; + + /* Compare that many; failure if mismatch, else move + past them. */ + if (translate + ? bcmp_translate (d, d2, mcnt, translate) + : bcmp (d, d2, mcnt)) + goto fail; + d += mcnt, d2 += mcnt; + } + } + break; + + + /* begline matches the empty string at the beginning of the string + (unless `not_bol' is set in `bufp'), and, if + `newline_anchor' is set, after newlines. */ + case begline: + DEBUG_PRINT1 ("EXECUTING begline.\n"); + + if (AT_STRINGS_BEG (d)) + { + if (!bufp->not_bol) break; + } + else if (d[-1] == '\n' && bufp->newline_anchor) + { + break; + } + /* In all other cases, we fail. */ + goto fail; + + + /* endline is the dual of begline. */ + case endline: + DEBUG_PRINT1 ("EXECUTING endline.\n"); + + if (AT_STRINGS_END (d)) + { + if (!bufp->not_eol) break; + } + + /* We have to ``prefetch'' the next character. */ + else if ((d == end1 ? *string2 : *d) == '\n' + && bufp->newline_anchor) + { + break; + } + goto fail; + + + /* Match at the very beginning of the data. */ + case begbuf: + DEBUG_PRINT1 ("EXECUTING begbuf.\n"); + if (AT_STRINGS_BEG (d)) + break; + goto fail; + + + /* Match at the very end of the data. */ + case endbuf: + DEBUG_PRINT1 ("EXECUTING endbuf.\n"); + if (AT_STRINGS_END (d)) + break; + goto fail; + + + /* on_failure_keep_string_jump is used to optimize `.*\n'. It + pushes NULL as the value for the string on the stack. Then + `pop_failure_point' will keep the current value for the + string, instead of restoring it. To see why, consider + matching `foo\nbar' against `.*\n'. The .* matches the foo; + then the . fails against the \n. But the next thing we want + to do is match the \n against the \n; if we restored the + string value, we would be back at the foo. + + Because this is used only in specific cases, we don't need to + check all the things that `on_failure_jump' does, to make + sure the right things get saved on the stack. Hence we don't + share its code. The only reason to push anything on the + stack at all is that otherwise we would have to change + `anychar's code to do something besides goto fail in this + case; that seems worse than this. */ + case on_failure_keep_string_jump: + DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); + + PUSH_FAILURE_POINT (p + mcnt, NULL, -2); + break; + + + /* Uses of on_failure_jump: + + Each alternative starts with an on_failure_jump that points + to the beginning of the next alternative. Each alternative + except the last ends with a jump that in effect jumps past + the rest of the alternatives. (They really jump to the + ending jump of the following alternative, because tensioning + these jumps is a hassle.) + + Repeats start with an on_failure_jump that points past both + the repetition text and either the following jump or + pop_failure_jump back to this on_failure_jump. */ + case on_failure_jump: + on_failure: + DEBUG_PRINT1 ("EXECUTING on_failure_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); + + /* If this on_failure_jump comes right before a group (i.e., + the original * applied to a group), save the information + for that group and all inner ones, so that if we fail back + to this point, the group's information will be correct. + For example, in \(a*\)*\1, we need the preceding group, + and in \(\(a*\)b*\)\2, we need the inner group. */ + + /* We can't use `p' to check ahead because we push + a failure point to `p + mcnt' after we do this. */ + p1 = p; + + /* We need to skip no_op's before we look for the + start_memory in case this on_failure_jump is happening as + the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 + against aba. */ + while (p1 < pend && (re_opcode_t) *p1 == no_op) + p1++; + + if (p1 < pend && (re_opcode_t) *p1 == start_memory) + { + /* We have a new highest active register now. This will + get reset at the start_memory we are about to get to, + but we will have saved all the registers relevant to + this repetition op, as described above. */ + highest_active_reg = *(p1 + 1) + *(p1 + 2); + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *(p1 + 1); + } + + DEBUG_PRINT1 (":\n"); + PUSH_FAILURE_POINT (p + mcnt, d, -2); + break; + + + /* A smart repeat ends with `maybe_pop_jump'. + We change it to either `pop_failure_jump' or `jump'. */ + case maybe_pop_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); + { + register unsigned char *p2 = p; + + /* Compare the beginning of the repeat with what in the + pattern follows its end. If we can establish that there + is nothing that they would both match, i.e., that we + would have to backtrack because of (as in, e.g., `a*a') + then we can change to pop_failure_jump, because we'll + never have to backtrack. + + This is not true in the case of alternatives: in + `(a|ab)*' we do need to backtrack to the `ab' alternative + (e.g., if the string was `ab'). But instead of trying to + detect that here, the alternative has put on a dummy + failure point which is what we will end up popping. */ + + /* Skip over open/close-group commands. */ + while (p2 + 2 < pend + && ((re_opcode_t) *p2 == stop_memory + || (re_opcode_t) *p2 == start_memory)) + p2 += 3; /* Skip over args, too. */ + + /* If we're at the end of the pattern, we can change. */ + if (p2 == pend) + { + /* Consider what happens when matching ":\(.*\)" + against ":/". I don't really understand this code + yet. */ + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 + (" End of pattern: change to `pop_failure_jump'.\n"); + } + + else if ((re_opcode_t) *p2 == exactn + || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) + { + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; + p1 = p + mcnt; + + /* p1[0] ... p1[2] are the `on_failure_jump' corresponding + to the `maybe_finalize_jump' of this case. Examine what + follows. */ + if ((re_opcode_t) p1[3] == exactn && p1[5] != c) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset + || (re_opcode_t) p1[3] == charset_not) + { + int not = (re_opcode_t) p1[3] == charset_not; + + if (c < (unsigned char) (p1[4] * BYTEWIDTH) + && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + /* `not' is equal to 1 if c would match, which means + that we can't change to pop_failure_jump. */ + if (!not) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } + } + p -= 2; /* Point at relative address again. */ + if ((re_opcode_t) p[-1] != pop_failure_jump) + { + p[-1] = (unsigned char) jump; + DEBUG_PRINT1 (" Match => jump.\n"); + goto unconditional_jump; + } + /* Note fall through. */ + + + /* The end of a simple repeat has a pop_failure_jump back to + its matching on_failure_jump, where the latter will push a + failure point. The pop_failure_jump takes off failure + points put on by this pop_failure_jump's matching + on_failure_jump; we got through the pattern to here from the + matching on_failure_jump, so didn't fail. */ + case pop_failure_jump: + { + /* We need to pass separate storage for the lowest and + highest registers, even though we don't care about the + actual values. Otherwise, we will restore only one + register from the stack, since lowest will == highest in + `pop_failure_point'. */ + unsigned dummy_low_reg, dummy_high_reg; + unsigned char *pdummy; + const char *sdummy; + + DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); + POP_FAILURE_POINT (sdummy, pdummy, + dummy_low_reg, dummy_high_reg, + reg_dummy, reg_dummy, reg_info_dummy); + } + /* Note fall through. */ + + + /* Unconditionally jump (without popping any failure points). */ + case jump: + unconditional_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ + DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); + p += mcnt; /* Do the jump. */ + DEBUG_PRINT2 ("(to 0x%x).\n", p); + break; + + + /* We need this opcode so we can detect where alternatives end + in `group_match_null_string_p' et al. */ + case jump_past_alt: + DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); + goto unconditional_jump; + + + /* Normally, the on_failure_jump pushes a failure point, which + then gets popped at pop_failure_jump. We will end up at + pop_failure_jump, also, and with a pattern of, say, `a+', we + are skipping over the on_failure_jump, so we have to push + something meaningless for pop_failure_jump to pop. */ + case dummy_failure_jump: + DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); + /* It doesn't matter what we push for the string here. What + the code at `fail' tests is the value for the pattern. */ + PUSH_FAILURE_POINT (0, 0, -2); + goto unconditional_jump; + + + /* At the end of an alternative, we need to push a dummy failure + point in case we are followed by a `pop_failure_jump', because + we don't want the failure point for the alternative to be + popped. For example, matching `(a|ab)*' against `aab' + requires that we match the `ab' alternative. */ + case push_dummy_failure: + DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); + /* See comments just above at `dummy_failure_jump' about the + two zeroes. */ + PUSH_FAILURE_POINT (0, 0, -2); + break; + + /* Have to succeed matching what follows at least n times. + After that, handle like `on_failure_jump'. */ + case succeed_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); + + assert (mcnt >= 0); + /* Originally, this is how many times we HAVE to succeed. */ + if (mcnt > 0) + { + mcnt--; + p += 2; + STORE_NUMBER_AND_INCR (p, mcnt); + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); + } + else if (mcnt == 0) + { + DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); + p[2] = (unsigned char) no_op; + p[3] = (unsigned char) no_op; + goto on_failure; + } + break; + + case jump_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); + + /* Originally, this is how many times we CAN jump. */ + if (mcnt) + { + mcnt--; + STORE_NUMBER (p + 2, mcnt); + goto unconditional_jump; + } + /* If don't have to jump any more, skip over the rest of command. */ + else + p += 4; + break; + + case set_number_at: + { + DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + p1 = p + mcnt; + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); + STORE_NUMBER (p1, mcnt); + break; + } + + case wordbound: + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + break; + goto fail; + + case notwordbound: + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + goto fail; + break; + + case wordbeg: + DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); + if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) + break; + goto fail; + + case wordend: + DEBUG_PRINT1 ("EXECUTING wordend.\n"); + if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) + && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) + break; + goto fail; + +#ifdef emacs +#ifdef emacs19 + case before_dot: + DEBUG_PRINT1 ("EXECUTING before_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) >= point) + goto fail; + break; + + case at_dot: + DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) != point) + goto fail; + break; + + case after_dot: + DEBUG_PRINT1 ("EXECUTING after_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) <= point) + goto fail; + break; +#else /* not emacs19 */ + case at_dot: + DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) + goto fail; + break; +#endif /* not emacs19 */ + + case syntaxspec: + DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchsyntax; + + case wordchar: + DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); + mcnt = (int) Sword; + matchsyntax: + PREFETCH (); + if (SYNTAX (*d++) != (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + + case notsyntaxspec: + DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchnotsyntax; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); + mcnt = (int) Sword; + matchnotsyntax: + PREFETCH (); + if (SYNTAX (*d++) == (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + +#else /* not emacs */ + case wordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); + PREFETCH (); + if (!WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); + PREFETCH (); + if (WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; +#endif /* not emacs */ + + default: + abort (); + } + continue; /* Successfully executed one pattern command; keep going. */ + + + /* We goto here if a matching operation fails. */ + fail: + if (!FAIL_STACK_EMPTY ()) + { /* A restart point is known. Restore to that state. */ + DEBUG_PRINT1 ("\nFAIL:\n"); + POP_FAILURE_POINT (d, p, + lowest_active_reg, highest_active_reg, + regstart, regend, reg_info); + + /* If this failure point is a dummy, try the next one. */ + if (!p) + goto fail; + + /* If we failed to the end of the pattern, don't examine *p. */ + assert (p <= pend); + if (p < pend) + { + boolean is_a_jump_n = false; + + /* If failed to a backwards jump that's part of a repetition + loop, need to pop this failure point and use the next one. */ + switch ((re_opcode_t) *p) + { + case jump_n: + is_a_jump_n = true; + case maybe_pop_jump: + case pop_failure_jump: + case jump: + p1 = p + 1; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + + if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) + || (!is_a_jump_n + && (re_opcode_t) *p1 == on_failure_jump)) + goto fail; + break; + default: + /* do nothing */ ; + } + } + + if (d >= string1 && d <= end1) + dend = end_match_1; + } + else + break; /* Matching at this starting point really fails. */ + } /* for (;;) */ + + if (best_regs_set) + goto restore_best_regs; + + FREE_VARIABLES (); + + return -1; /* Failure to match. */ +} /* re_match_2 */ + +/* Subroutine definitions for re_match_2. */ + + +/* We are passed P pointing to a register number after a start_memory. + + Return true if the pattern up to the corresponding stop_memory can + match the empty string, and false otherwise. + + If we find the matching stop_memory, sets P to point to one past its number. + Otherwise, sets P to an undefined byte less than or equal to END. + + We don't handle duplicates properly (yet). */ + +static boolean +group_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + /* Point to after the args to the start_memory. */ + unsigned char *p1 = *p + 2; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and return true or + false, as appropriate, when we get to one that can't, or to the + matching stop_memory. */ + + switch ((re_opcode_t) *p1) + { + /* Could be either a loop or a series of alternatives. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + /* If the next operation is not a jump backwards in the + pattern. */ + + if (mcnt >= 0) + { + /* Go through the on_failure_jumps of the alternatives, + seeing if any of the alternatives cannot match nothing. + The last alternative starts with only a jump, + whereas the rest start with on_failure_jump and end + with a jump, e.g., here is the pattern for `a|b|c': + + /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 + /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 + /exactn/1/c + + So, we have to first go through the first (n-1) + alternatives and then deal with the last one separately. */ + + + /* Deal with the first (n-1) alternatives, which start + with an on_failure_jump (see above) that jumps to right + past a jump_past_alt. */ + + while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) + { + /* `mcnt' holds how many bytes long the alternative + is, including the ending `jump_past_alt' and + its number. */ + + if (!alt_match_null_string_p (p1, p1 + mcnt - 3, + reg_info)) + return false; + + /* Move to right after this alternative, including the + jump_past_alt. */ + p1 += mcnt; + + /* Break if it's the beginning of an n-th alternative + that doesn't begin with an on_failure_jump. */ + if ((re_opcode_t) *p1 != on_failure_jump) + break; + + /* Still have to check that it's not an n-th + alternative that starts with an on_failure_jump. */ + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) + { + /* Get to the beginning of the n-th alternative. */ + p1 -= 3; + break; + } + } + + /* Deal with the last alternative: go back and get number + of the `jump_past_alt' just before it. `mcnt' contains + the length of the alternative. */ + EXTRACT_NUMBER (mcnt, p1 - 2); + + if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) + return false; + + p1 += mcnt; /* Get past the n-th alternative. */ + } /* if mcnt > 0 */ + break; + + + case stop_memory: + assert (p1[1] == **p); + *p = p1 + 2; + return true; + + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return false; +} /* group_match_null_string_p */ + + +/* Similar to group_match_null_string_p, but doesn't deal with alternatives: + It expects P to be the first byte of a single alternative and END one + byte past the last. The alternative can contain groups. */ + +static boolean +alt_match_null_string_p (p, end, reg_info) + unsigned char *p, *end; + register_info_type *reg_info; +{ + int mcnt; + unsigned char *p1 = p; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and break when we get + to one that can't. */ + + switch ((re_opcode_t) *p1) + { + /* It's a loop. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + break; + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return true; +} /* alt_match_null_string_p */ + + +/* Deals with the ops common to group_match_null_string_p and + alt_match_null_string_p. + + Sets P to one after the op and its arguments, if any. */ + +static boolean +common_op_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + boolean ret; + int reg_no; + unsigned char *p1 = *p; + + switch ((re_opcode_t) *p1++) + { + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbeg: + case wordend: + case wordbound: + case notwordbound: +#ifdef emacs + case before_dot: + case at_dot: + case after_dot: +#endif + break; + + case start_memory: + reg_no = *p1; + assert (reg_no > 0 && reg_no <= MAX_REGNUM); + ret = group_match_null_string_p (&p1, end, reg_info); + + /* Have to set this here in case we're checking a group which + contains a group and a back reference to it. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; + + if (!ret) + return false; + break; + + /* If this is an optimized succeed_n for zero times, make the jump. */ + case jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (mcnt >= 0) + p1 += mcnt; + else + return false; + break; + + case succeed_n: + /* Get to the number of times to succeed. */ + p1 += 2; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + if (mcnt == 0) + { + p1 -= 4; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + } + else + return false; + break; + + case duplicate: + if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) + return false; + break; + + case set_number_at: + p1 += 4; + + default: + /* All other opcodes mean we cannot match the empty string. */ + return false; + } + + *p = p1; + return true; +} /* common_op_match_null_string_p */ + + +/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN + bytes; nonzero otherwise. */ + +static int +bcmp_translate (s1, s2, len, translate) + unsigned char *s1, *s2; + register int len; + char *translate; +{ + register unsigned char *p1 = s1, *p2 = s2; + while (len) + { + if (translate[*p1++] != translate[*p2++]) return 1; + len--; + } + return 0; +} + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length SIZE) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. + + We call regex_compile to do the actual compilation. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + int length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* GNU code is written to assume at least RE_NREGS registers will be set + (and at least one extra will be -1). */ + bufp->regs_allocated = REGS_UNALLOCATED; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub. */ + bufp->no_sub = 0; + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = regex_compile (pattern, length, re_syntax_options, bufp); + + return re_error_msg[(int) ret]; +} + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them if this is an Emacs or POSIX compilation. */ + +#if !defined (emacs) && !defined (_POSIX_SOURCE) + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + + if (!s) + { + if (!re_comp_buf.buffer) + return "No previous regular expression"; + return 0; + } + + if (!re_comp_buf.buffer) + { + re_comp_buf.buffer = (unsigned char *) malloc (200); + if (re_comp_buf.buffer == NULL) + return "Memory exhausted"; + re_comp_buf.allocated = 200; + + re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); + if (re_comp_buf.fastmap == NULL) + return "Memory exhausted"; + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); + + /* Yes, we're discarding `const' here. */ + return (char *) re_error_msg[(int) ret]; +} + + +int +re_exec (s) + const char *s; +{ + const int len = strlen (s); + return + 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); +} +#endif /* not emacs and not _POSIX_SOURCE */ + +/* POSIX.2 functions. Don't define these for Emacs. */ + +#ifndef emacs + +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' and `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *preg; + const char *pattern; + int cflags; +{ + reg_errcode_t ret; + unsigned syntax + = (cflags & REG_EXTENDED) ? + RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; + + /* regex_compile will allocate the space for the compiled pattern. */ + preg->buffer = 0; + preg->allocated = 0; + + /* Don't bother to use a fastmap when searching. This simplifies the + REG_NEWLINE case: if we used a fastmap, we'd have to put all the + characters after newlines into the fastmap. This way, we just try + every character. */ + preg->fastmap = 0; + + if (cflags & REG_ICASE) + { + unsigned i; + + preg->translate = (char *) malloc (CHAR_SET_SIZE); + if (preg->translate == NULL) + return (int) REG_ESPACE; + + /* Map uppercase characters to corresponding lowercase ones. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + preg->translate[i] = ISUPPER (i) ? tolower (i) : i; + } + else + preg->translate = NULL; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + + preg->no_sub = !!(cflags & REG_NOSUB); + + /* POSIX says a null character in the pattern terminates it, so we + can use strlen here in compiling the pattern. */ + ret = regex_compile (pattern, strlen (pattern), syntax, preg); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) ret = REG_EPAREN; + + return (int) ret; +} + + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *preg; + const char *string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + int ret; + struct re_registers regs; + regex_t private_preg; + int len = strlen (string); + boolean want_reg_info = !preg->no_sub && nmatch > 0; + + private_preg = *preg; + + private_preg.not_bol = !!(eflags & REG_NOTBOL); + private_preg.not_eol = !!(eflags & REG_NOTEOL); + + /* The user has told us exactly how many registers to return + information about, via `nmatch'. We have to pass that on to the + matching routines. */ + private_preg.regs_allocated = REGS_FIXED; + + if (want_reg_info) + { + regs.num_regs = nmatch; + regs.start = TALLOC (nmatch, regoff_t); + regs.end = TALLOC (nmatch, regoff_t); + if (regs.start == NULL || regs.end == NULL) + return (int) REG_NOMATCH; + } + + /* Perform the searching operation. */ + ret = re_search (&private_preg, string, len, + /* start: */ 0, /* range: */ len, + want_reg_info ? ®s : (struct re_registers *) 0); + + /* Copy the register information to the POSIX structure. */ + if (want_reg_info) + { + if (ret >= 0) + { + unsigned r; + + for (r = 0; r < nmatch; r++) + { + pmatch[r].rm_so = regs.start[r]; + pmatch[r].rm_eo = regs.end[r]; + } + } + + /* If we needed the temporary register info, free the space now. */ + free (regs.start); + free (regs.end); + } + + /* We want zero return to mean success, unlike `re_search'. */ + return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; +} + + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror (errcode, preg, errbuf, errbuf_size) + int errcode; + const regex_t *preg; + char *errbuf; + size_t errbuf_size; +{ + const char *msg; + size_t msg_size; + + if (errcode < 0 + || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = re_error_msg[errcode]; + + /* POSIX doesn't require that we do anything in this case, but why + not be nice. */ + if (! msg) + msg = "Success"; + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (errbuf_size != 0) + { + if (msg_size > errbuf_size) + { + strncpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; + } + else + strcpy (errbuf, msg); + } + + return msg_size; +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + if (preg->buffer != NULL) + free (preg->buffer); + preg->buffer = NULL; + + preg->allocated = 0; + preg->used = 0; + + if (preg->fastmap != NULL) + free (preg->fastmap); + preg->fastmap = NULL; + preg->fastmap_accurate = 0; + + if (preg->translate != NULL) + free (preg->translate); + preg->translate = NULL; +} + +#endif /* not emacs */ + +/* +Local variables: +make-backup-files: t +version-control: t +trim-versions-without-asking: nil +End: +*/ diff --git a/bin/ed/regex.h b/bin/ed/regex.h new file mode 100644 index 0000000000..408dd21034 --- /dev/null +++ b/bin/ed/regex.h @@ -0,0 +1,490 @@ +/* Definitions for data structures and routines for the regular + expression library, version 0.12. + + Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef __REGEXP_LIBRARY_H__ +#define __REGEXP_LIBRARY_H__ + +/* POSIX says that must be included (by the caller) before + . */ + +#ifdef VMS +/* VMS doesn't have `size_t' in , even though POSIX says it + should be there. */ +#include +#endif + + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned reg_syntax_t; + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS + replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +#undef RE_DUP_MAX +#endif +#define RE_DUP_MAX ((1 << 15) - 1) + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Not implemented. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +struct re_pattern_buffer +{ +/* [[[begin pattern_buffer]]] */ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are + sometimes used as array indexes. */ + unsigned char *buffer; + + /* Number of bytes to which `buffer' points. */ + unsigned long allocated; + + /* Number of bytes actually used in `buffer'. */ + unsigned long used; + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t syntax; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses + the fastmap, if there is one, to skip over impossible + starting points for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation + is applied to a pattern when it is compiled and to a string + when it is matched. */ + char *translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see + whether or not we should use the fastmap, so we don't set + this absolutely perfectly; see `re_compile_fastmap' (the + `duplicate' case). */ + unsigned can_be_null : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#define REGS_UNALLOCATED 0 +#define REGS_REALLOCATE 1 +#define REGS_FIXED 2 + unsigned regs_allocated : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned no_sub : 1; + + /* If set, a beginning-of-line anchor doesn't match at the + beginning of the string. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If true, an anchor at a newline matches. */ + unsigned newline_anchor : 1; + +/* [[[end pattern_buffer]]] */ +}; + +typedef struct re_pattern_buffer regex_t; + + +/* search.c (search_buffer) in Emacs needs this one opcode value. It is + defined both in `regex.c' and here. */ +#define RE_EXACTN_VALUE 1 + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +#ifndef RE_NREGS +#define RE_NREGS 30 +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +/* To avoid duplicating every routine declaration -- once with a + prototype (if we are ANSI), and once without (if we aren't) -- we + use the following macro to declare argument types. This + unfortunately clutters up the declarations a bit, but I think it's + worth it. */ + +#if __STDC__ + +#define _RE_ARGS(args) args + +#else /* not __STDC__ */ + +#define _RE_ARGS(args) () + +#endif /* not __STDC__ */ + +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern + _RE_ARGS ((const char *pattern, int length, + struct re_pattern_buffer *buffer)); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, int range, struct re_registers *regs)); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, int range, struct re_registers *regs, int stop)); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, struct re_registers *regs)); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, struct re_registers *regs, int stop)); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers + _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, + unsigned num_regs, regoff_t *starts, regoff_t *ends)); + +/* 4.2 bsd compatibility. */ +extern char *re_comp _RE_ARGS ((const char *)); +extern int re_exec _RE_ARGS ((const char *)); + +/* POSIX compatibility. */ +extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); +extern int regexec + _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags)); +extern size_t regerror + _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, + size_t errbuf_size)); +extern void regfree _RE_ARGS ((regex_t *preg)); + +#endif /* not __REGEXP_LIBRARY_H__ */ + +/* +Local variables: +make-backup-files: t +version-control: t +trim-versions-without-asking: nil +End: +*/ diff --git a/bin/ed/test/!1.d b/bin/ed/test/!1.d new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/!1.err b/bin/ed/test/!1.err new file mode 100644 index 0000000000..630af9011c --- /dev/null +++ b/bin/ed/test/!1.err @@ -0,0 +1 @@ +.!date diff --git a/bin/ed/test/!1.r b/bin/ed/test/!1.r new file mode 100644 index 0000000000..dcf02b2fb6 --- /dev/null +++ b/bin/ed/test/!1.r @@ -0,0 +1 @@ +okay diff --git a/bin/ed/test/!1.t b/bin/ed/test/!1.t new file mode 100644 index 0000000000..d7b1fea1f7 --- /dev/null +++ b/bin/ed/test/!1.t @@ -0,0 +1,5 @@ +!read one +hello, world +a +okay +. diff --git a/bin/ed/test/!2.d b/bin/ed/test/!2.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/!2.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/!2.err b/bin/ed/test/!2.err new file mode 100644 index 0000000000..79d8956822 --- /dev/null +++ b/bin/ed/test/!2.err @@ -0,0 +1 @@ +!! diff --git a/bin/ed/test/!2.r b/bin/ed/test/!2.r new file mode 100644 index 0000000000..65f58a262a --- /dev/null +++ b/bin/ed/test/!2.r @@ -0,0 +1,4 @@ +line 1 +hello world! +line 4 +hello world diff --git a/bin/ed/test/!2.t b/bin/ed/test/!2.t new file mode 100644 index 0000000000..14c681d44a --- /dev/null +++ b/bin/ed/test/!2.t @@ -0,0 +1,2 @@ +.!echo hello world +2,3!echo hello world! diff --git a/bin/ed/test/=.err b/bin/ed/test/=.err new file mode 100644 index 0000000000..6a6055955b --- /dev/null +++ b/bin/ed/test/=.err @@ -0,0 +1 @@ +1,$= diff --git a/bin/ed/test/Makefile b/bin/ed/test/Makefile new file mode 100644 index 0000000000..750f388fe4 --- /dev/null +++ b/bin/ed/test/Makefile @@ -0,0 +1,17 @@ +ED= ../obj/ed + +all: build test + @echo done + +build: mkscripts.sh + @echo building test scripts... + @chmod +x mkscripts.sh + @mkscripts.sh ${ED} + +test: build ckscripts.sh + @echo running test scripts... + @chmod +x ckscripts.sh + @ckscripts.sh ${ED} + +clean: + rm -f *.ed *.[oz] *~ diff --git a/bin/ed/test/README b/bin/ed/test/README new file mode 100644 index 0000000000..46d4133503 --- /dev/null +++ b/bin/ed/test/README @@ -0,0 +1,41 @@ +The files in this directory with suffixes `.t', `.d', `.r' and `.err' are +used for testing ed. To run the tests, set the ED variable in the Makefile +for the path name of the program to be tested (e.g., /bin/ed), and type +`make'. The tests do not exhaustively verify POSIX compliance nor do +they verify correct 8-bit or long line support. + +The test file suffixes have the following meanings: +.t Template - a list of ed commands from which an ed script is + constructed +.d Data - read by an ed script +.r Result - the expected output after processing data via an ed + script. +.err Error - invalid ed commands that should generate an error + +The output of the tests is written to the two files err.o and scripts.o. +At the end of the tests, these files are grep'ed for error messages, +which look like: + *** The script u.ed exited abnormally *** +or: + *** Output u.o of script u.ed is incorrect *** + +It is assumed that the ed being tested processes escapes (\) in file names. +This is so that a name starting with bang (!) can be read, via: + r \!file +Without the escape, a POSIX ed would attempt to read the output of +the shell command `file'. If the ed being tested does not support escape +processing on file names, then the script `mkscripts.sh' should be modified +accordingly. + +The POSIX requirement that an address range not be used where at most +a single address is expected has been relaxed in this version of ed. +Therefore, the following scripts which test for compliance with this +POSIX rule exit abnormally: +=-err.ed +a1-err.ed +i1-err.ed +k1-err.ed +r1-err.ed + +In addition, one of !1-err.ed or !2.ed will fail, depending on whether or +not ed was compiled with the VI_BANG directive. diff --git a/bin/ed/test/TODO b/bin/ed/test/TODO new file mode 100644 index 0000000000..7a4b74fb74 --- /dev/null +++ b/bin/ed/test/TODO @@ -0,0 +1,15 @@ +Some missing tests: +0) g/./s^@^@ - okay: NULs in commands +1) g/./s/^@/ - okay: NULs in patterns +2) a + hello^V^Jworld + . - okay: embedded newlines in insert mode +3) ed "" - error: invalid filename +4) red .. - error: restricted +5) red / - error: restricted +5) red !xx - error: restricted +6) ed -x - verify: 8-bit clean +7) ed - verify: long-line support +8) ed - verify: interactive/help mode +9) G/pat/ - verify: global interactive command +10) V/pat/ - verify: global interactive command diff --git a/bin/ed/test/a.d b/bin/ed/test/a.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/a.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/a.r b/bin/ed/test/a.r new file mode 100644 index 0000000000..26257bd3b3 --- /dev/null +++ b/bin/ed/test/a.r @@ -0,0 +1,8 @@ +hello world +line 1 +hello world! +line 2 +line 3 +line 4 +line5 +hello world!! diff --git a/bin/ed/test/a.t b/bin/ed/test/a.t new file mode 100644 index 0000000000..ac98c40d08 --- /dev/null +++ b/bin/ed/test/a.t @@ -0,0 +1,9 @@ +0a +hello world +. +2a +hello world! +. +$a +hello world!! +. diff --git a/bin/ed/test/a1.err b/bin/ed/test/a1.err new file mode 100644 index 0000000000..e80815ff50 --- /dev/null +++ b/bin/ed/test/a1.err @@ -0,0 +1,3 @@ +1,$a +hello world +. diff --git a/bin/ed/test/a2.err b/bin/ed/test/a2.err new file mode 100644 index 0000000000..ec4b00b40c --- /dev/null +++ b/bin/ed/test/a2.err @@ -0,0 +1,3 @@ +aa +hello world +. diff --git a/bin/ed/test/addr1.err b/bin/ed/test/addr1.err new file mode 100644 index 0000000000..29d6383b52 --- /dev/null +++ b/bin/ed/test/addr1.err @@ -0,0 +1 @@ +100 diff --git a/bin/ed/test/addr2.err b/bin/ed/test/addr2.err new file mode 100644 index 0000000000..e96acb9254 --- /dev/null +++ b/bin/ed/test/addr2.err @@ -0,0 +1 @@ +-100 diff --git a/bin/ed/test/ascii.d b/bin/ed/test/ascii.d new file mode 100644 index 0000000000..c86626638e Binary files /dev/null and b/bin/ed/test/ascii.d differ diff --git a/bin/ed/test/ascii.r b/bin/ed/test/ascii.r new file mode 100644 index 0000000000..c86626638e Binary files /dev/null and b/bin/ed/test/ascii.r differ diff --git a/bin/ed/test/ascii.t b/bin/ed/test/ascii.t new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/c.d b/bin/ed/test/c.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/c.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/c.r b/bin/ed/test/c.r new file mode 100644 index 0000000000..0fb3e4fffc --- /dev/null +++ b/bin/ed/test/c.r @@ -0,0 +1,4 @@ +at the top +between top/middle +in the middle +at the bottom diff --git a/bin/ed/test/c.t b/bin/ed/test/c.t new file mode 100644 index 0000000000..ebdd536f81 --- /dev/null +++ b/bin/ed/test/c.t @@ -0,0 +1,12 @@ +1c +at the top +. +4c +in the middle +. +$c +at the bottom +. +2,3c +between top/middle +. diff --git a/bin/ed/test/c1.err b/bin/ed/test/c1.err new file mode 100644 index 0000000000..658ec38b46 --- /dev/null +++ b/bin/ed/test/c1.err @@ -0,0 +1,3 @@ +cc +hello world +. diff --git a/bin/ed/test/c2.err b/bin/ed/test/c2.err new file mode 100644 index 0000000000..24b322776a --- /dev/null +++ b/bin/ed/test/c2.err @@ -0,0 +1,3 @@ +0c +hello world +. diff --git a/bin/ed/test/ckscripts.sh b/bin/ed/test/ckscripts.sh new file mode 100644 index 0000000000..87a7e9beb5 --- /dev/null +++ b/bin/ed/test/ckscripts.sh @@ -0,0 +1,37 @@ +#!/bin/sh - +# This script runs the .ed scripts generated by mkscripts.sh +# and compares their output against the .r files, which contain +# the correct output + +PATH="/bin:/usr/bin:/usr/local/bin/:." +ED=$1 +[ X"$ED" = X -o ! -x $ED ] && ED="../ed" +[ ! -x $ED ] && { echo "$ED: cannot execute"; exit 1; } + +# Run the *-err.ed scripts first, since these don't generate output; +# rename then to *-err.ed~; they exit with non-zero status +for i in *-err.ed; do + echo $i~ + if $i; then + echo "*** The script $i~ exited abnormally ***" + fi + mv $i $i~ +done >errs.o 2>&1 + +# Run the remainding scripts; they exit with zero status +for i in *.ed; do + base=`expr $i : '\([^.]*\)'` +# base=`echo $i | sed 's/\..*//'` +# base=`$ED - \!"echo \\\\$i" <<-EOF +# s/\..* +# EOF` + if $base.ed; then + if cmp -s $base.o $base.r; then :; else + echo "*** Output $base.o of script $i is incorrect ***" + fi + else + echo "*** The script $i exited abnormally ***" + fi +done >scripts.o 2>&1 + +grep -h '\*\*\*' errs.o scripts.o diff --git a/bin/ed/test/d.d b/bin/ed/test/d.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/d.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/d.err b/bin/ed/test/d.err new file mode 100644 index 0000000000..f03f6945fb --- /dev/null +++ b/bin/ed/test/d.err @@ -0,0 +1 @@ +dd diff --git a/bin/ed/test/d.r b/bin/ed/test/d.r new file mode 100644 index 0000000000..b7e242c00c --- /dev/null +++ b/bin/ed/test/d.r @@ -0,0 +1 @@ +line 2 diff --git a/bin/ed/test/d.t b/bin/ed/test/d.t new file mode 100644 index 0000000000..c7c473febd --- /dev/null +++ b/bin/ed/test/d.t @@ -0,0 +1,3 @@ +1d +2;+1d +$d diff --git a/bin/ed/test/e1.d b/bin/ed/test/e1.d new file mode 100644 index 0000000000..3b18e512db --- /dev/null +++ b/bin/ed/test/e1.d @@ -0,0 +1 @@ +hello world diff --git a/bin/ed/test/e1.err b/bin/ed/test/e1.err new file mode 100644 index 0000000000..827cc292b6 --- /dev/null +++ b/bin/ed/test/e1.err @@ -0,0 +1 @@ +ee e1.err diff --git a/bin/ed/test/e1.r b/bin/ed/test/e1.r new file mode 100644 index 0000000000..e656728bab --- /dev/null +++ b/bin/ed/test/e1.r @@ -0,0 +1 @@ +E e1.t diff --git a/bin/ed/test/e1.t b/bin/ed/test/e1.t new file mode 100644 index 0000000000..e656728bab --- /dev/null +++ b/bin/ed/test/e1.t @@ -0,0 +1 @@ +E e1.t diff --git a/bin/ed/test/e2.d b/bin/ed/test/e2.d new file mode 100644 index 0000000000..aa44630d22 --- /dev/null +++ b/bin/ed/test/e2.d @@ -0,0 +1 @@ +E !echo hello world- diff --git a/bin/ed/test/e2.err b/bin/ed/test/e2.err new file mode 100644 index 0000000000..779a64b54f --- /dev/null +++ b/bin/ed/test/e2.err @@ -0,0 +1 @@ +.e e2.err diff --git a/bin/ed/test/e2.r b/bin/ed/test/e2.r new file mode 100644 index 0000000000..59ebf11fd0 --- /dev/null +++ b/bin/ed/test/e2.r @@ -0,0 +1 @@ +hello world- diff --git a/bin/ed/test/e2.t b/bin/ed/test/e2.t new file mode 100644 index 0000000000..aa44630d22 --- /dev/null +++ b/bin/ed/test/e2.t @@ -0,0 +1 @@ +E !echo hello world- diff --git a/bin/ed/test/e3.d b/bin/ed/test/e3.d new file mode 100644 index 0000000000..aa44630d22 --- /dev/null +++ b/bin/ed/test/e3.d @@ -0,0 +1 @@ +E !echo hello world- diff --git a/bin/ed/test/e3.err b/bin/ed/test/e3.err new file mode 100644 index 0000000000..80a7fdcf92 --- /dev/null +++ b/bin/ed/test/e3.err @@ -0,0 +1 @@ +ee.err diff --git a/bin/ed/test/e3.r b/bin/ed/test/e3.r new file mode 100644 index 0000000000..aa44630d22 --- /dev/null +++ b/bin/ed/test/e3.r @@ -0,0 +1 @@ +E !echo hello world- diff --git a/bin/ed/test/e3.t b/bin/ed/test/e3.t new file mode 100644 index 0000000000..1c50726138 --- /dev/null +++ b/bin/ed/test/e3.t @@ -0,0 +1 @@ +E diff --git a/bin/ed/test/e4.d b/bin/ed/test/e4.d new file mode 100644 index 0000000000..aa44630d22 --- /dev/null +++ b/bin/ed/test/e4.d @@ -0,0 +1 @@ +E !echo hello world- diff --git a/bin/ed/test/e4.r b/bin/ed/test/e4.r new file mode 100644 index 0000000000..aa44630d22 --- /dev/null +++ b/bin/ed/test/e4.r @@ -0,0 +1 @@ +E !echo hello world- diff --git a/bin/ed/test/e4.t b/bin/ed/test/e4.t new file mode 100644 index 0000000000..d905d9da82 --- /dev/null +++ b/bin/ed/test/e4.t @@ -0,0 +1 @@ +e diff --git a/bin/ed/test/f1.err b/bin/ed/test/f1.err new file mode 100644 index 0000000000..e60975adab --- /dev/null +++ b/bin/ed/test/f1.err @@ -0,0 +1 @@ +.f f1.err diff --git a/bin/ed/test/f2.err b/bin/ed/test/f2.err new file mode 100644 index 0000000000..26d1c5e3a7 --- /dev/null +++ b/bin/ed/test/f2.err @@ -0,0 +1 @@ +ff1.err diff --git a/bin/ed/test/g1.d b/bin/ed/test/g1.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/g1.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/g1.err b/bin/ed/test/g1.err new file mode 100644 index 0000000000..f95ea22232 --- /dev/null +++ b/bin/ed/test/g1.err @@ -0,0 +1 @@ +g/./s //x/ diff --git a/bin/ed/test/g1.r b/bin/ed/test/g1.r new file mode 100644 index 0000000000..578a44b6b2 --- /dev/null +++ b/bin/ed/test/g1.r @@ -0,0 +1,15 @@ +line5 +help! world +order +line 4 +help! world +order +line 3 +help! world +order +line 2 +help! world +order +line 1 +help! world +order diff --git a/bin/ed/test/g1.t b/bin/ed/test/g1.t new file mode 100644 index 0000000000..2d0b54f35a --- /dev/null +++ b/bin/ed/test/g1.t @@ -0,0 +1,6 @@ +g/./m0 +g/./s/$/\ +hello world +g/hello /s/lo/p!/\ +a\ +order diff --git a/bin/ed/test/g2.d b/bin/ed/test/g2.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/g2.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/g2.err b/bin/ed/test/g2.err new file mode 100644 index 0000000000..0ff6a5a601 --- /dev/null +++ b/bin/ed/test/g2.err @@ -0,0 +1 @@ +g//s/./x/ diff --git a/bin/ed/test/g2.r b/bin/ed/test/g2.r new file mode 100644 index 0000000000..3b18e512db --- /dev/null +++ b/bin/ed/test/g2.r @@ -0,0 +1 @@ +hello world diff --git a/bin/ed/test/g2.t b/bin/ed/test/g2.t new file mode 100644 index 0000000000..831ee8367b --- /dev/null +++ b/bin/ed/test/g2.t @@ -0,0 +1,2 @@ +g/[2-4]/-1,+1c\ +hello world diff --git a/bin/ed/test/g3.err b/bin/ed/test/g3.err new file mode 100644 index 0000000000..01058d844a --- /dev/null +++ b/bin/ed/test/g3.err @@ -0,0 +1 @@ +g diff --git a/bin/ed/test/h.err b/bin/ed/test/h.err new file mode 100644 index 0000000000..a71e506f6d --- /dev/null +++ b/bin/ed/test/h.err @@ -0,0 +1 @@ +.h diff --git a/bin/ed/test/i.d b/bin/ed/test/i.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/i.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/i.r b/bin/ed/test/i.r new file mode 100644 index 0000000000..5f27af09c0 --- /dev/null +++ b/bin/ed/test/i.r @@ -0,0 +1,8 @@ +hello world +hello world! +line 1 +line 2 +line 3 +line 4 +hello world!! +line5 diff --git a/bin/ed/test/i.t b/bin/ed/test/i.t new file mode 100644 index 0000000000..d1d98057d8 --- /dev/null +++ b/bin/ed/test/i.t @@ -0,0 +1,9 @@ +1i +hello world +. +2i +hello world! +. +$i +hello world!! +. diff --git a/bin/ed/test/i1.err b/bin/ed/test/i1.err new file mode 100644 index 0000000000..aaddede259 --- /dev/null +++ b/bin/ed/test/i1.err @@ -0,0 +1,3 @@ +1,$i +hello world +. diff --git a/bin/ed/test/i2.err b/bin/ed/test/i2.err new file mode 100644 index 0000000000..b63f5ac507 --- /dev/null +++ b/bin/ed/test/i2.err @@ -0,0 +1,3 @@ +ii +hello world +. diff --git a/bin/ed/test/i3.err b/bin/ed/test/i3.err new file mode 100644 index 0000000000..6d200c8c97 --- /dev/null +++ b/bin/ed/test/i3.err @@ -0,0 +1,3 @@ +0i +hello world +. diff --git a/bin/ed/test/j.d b/bin/ed/test/j.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/j.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/j.r b/bin/ed/test/j.r new file mode 100644 index 0000000000..66f36a8f8a --- /dev/null +++ b/bin/ed/test/j.r @@ -0,0 +1,4 @@ +line 1 +line 2line 3 +line 4 +line5 diff --git a/bin/ed/test/j.t b/bin/ed/test/j.t new file mode 100644 index 0000000000..9b5d28ddf1 --- /dev/null +++ b/bin/ed/test/j.t @@ -0,0 +1,2 @@ +1,1j +2,3j diff --git a/bin/ed/test/k.d b/bin/ed/test/k.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/k.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/k.r b/bin/ed/test/k.r new file mode 100644 index 0000000000..eeb38db20c --- /dev/null +++ b/bin/ed/test/k.r @@ -0,0 +1,5 @@ +line 3 +hello world +line 4 +line5 +line 2 diff --git a/bin/ed/test/k.t b/bin/ed/test/k.t new file mode 100644 index 0000000000..53d588dd07 --- /dev/null +++ b/bin/ed/test/k.t @@ -0,0 +1,10 @@ +2ka +1d +'am$ +1ka +0a +hello world +. +'ad +u +'am0 diff --git a/bin/ed/test/k1.err b/bin/ed/test/k1.err new file mode 100644 index 0000000000..eba1f3d8ff --- /dev/null +++ b/bin/ed/test/k1.err @@ -0,0 +1 @@ +1,$ka diff --git a/bin/ed/test/k2.err b/bin/ed/test/k2.err new file mode 100644 index 0000000000..b34a18d519 --- /dev/null +++ b/bin/ed/test/k2.err @@ -0,0 +1 @@ +kA diff --git a/bin/ed/test/k3.err b/bin/ed/test/k3.err new file mode 100644 index 0000000000..70190c473d --- /dev/null +++ b/bin/ed/test/k3.err @@ -0,0 +1 @@ +0ka diff --git a/bin/ed/test/k4.err b/bin/ed/test/k4.err new file mode 100644 index 0000000000..3457642229 --- /dev/null +++ b/bin/ed/test/k4.err @@ -0,0 +1,6 @@ +a +hello +. +.ka +'ad +'ap diff --git a/bin/ed/test/l.d b/bin/ed/test/l.d new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/l.r b/bin/ed/test/l.r new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/l.t b/bin/ed/test/l.t new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/m.d b/bin/ed/test/m.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/m.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/m.err b/bin/ed/test/m.err new file mode 100644 index 0000000000..3aec4c32c1 --- /dev/null +++ b/bin/ed/test/m.err @@ -0,0 +1,4 @@ +a +hello world +. +1,$m1 diff --git a/bin/ed/test/m.r b/bin/ed/test/m.r new file mode 100644 index 0000000000..186cf5403b --- /dev/null +++ b/bin/ed/test/m.r @@ -0,0 +1,5 @@ +line5 +line 1 +line 2 +line 3 +line 4 diff --git a/bin/ed/test/m.t b/bin/ed/test/m.t new file mode 100644 index 0000000000..c39c088724 --- /dev/null +++ b/bin/ed/test/m.t @@ -0,0 +1,7 @@ +1,2m$ +1,2m$ +1,2m$ +$m0 +$m0 +2,3m1 +2,3m3 diff --git a/bin/ed/test/mkscripts.sh b/bin/ed/test/mkscripts.sh new file mode 100644 index 0000000000..724db0cc05 --- /dev/null +++ b/bin/ed/test/mkscripts.sh @@ -0,0 +1,71 @@ +#!/bin/sh - +# This script generates ed test scripts (.ed) from .t files + +PATH="/bin:/usr/bin:/usr/local/bin/:." +ED=$1 +[ X"$ED" = X -o ! -x $ED ] && ED="../ed" +[ ! -x $ED ] && { echo "$ED: cannot execute"; exit 1; } + +for i in *.t; do +# base=${i%.*} +# base=`echo $i | sed 's/\..*//'` + base=`expr $i : '\([^.]*\)'` + ( + echo "#!/bin/sh -" + echo "$ED - <<\EOT" + echo "r \\$base.d" + cat $i + echo "w \\$base.o" + echo EOT + ) >$base.ed + chmod +x $base.ed +# The following is pretty ugly and not appropriate use of ed +# but the point is that it can be done... +# base=`$ED - \!"echo \\\\$i" <<-EOF +# s/\..* +# EOF` +# $ED - <<-EOF +# a +# #!/bin/sh - +# $ED - <<\EOT +# r \\$base.d +# w \\$base.o +# EOT +# . +# -2r \\$i +# w \\$base.ed +# !chmod +x \\$base.ed +# EOF +done + +for i in *.err; do +# base=${i%.*} +# base=`echo $i | sed 's/\..*//'` + base=`expr $i : '\([^.]*\)'` + ( + echo "#!/bin/sh -" + echo "$ED - <<\EOT" + echo H + echo "r \\$base.err" + cat $i + echo "w \\$base.o" + echo EOT + ) >$base-err.ed + chmod +x $base-err.ed +# base=`$ED - \!"echo \\\\$i" <<-EOF +# s/\..* +# EOF` +# $ED - <<-EOF +# a +# #!/bin/sh - +# $ED - <<\EOT +# H +# r \\$base.err +# w \\$base.o +# EOT +# . +# -2r \\$i +# w \\${base}-err.ed +# !chmod +x ${base}-err.ed +# EOF +done diff --git a/bin/ed/test/n.d b/bin/ed/test/n.d new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/n.r b/bin/ed/test/n.r new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/n.t b/bin/ed/test/n.t new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/p.d b/bin/ed/test/p.d new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/p.r b/bin/ed/test/p.r new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/p.t b/bin/ed/test/p.t new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/q.d b/bin/ed/test/q.d new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/q.r b/bin/ed/test/q.r new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/q.t b/bin/ed/test/q.t new file mode 100644 index 0000000000..123a2c8e2c --- /dev/null +++ b/bin/ed/test/q.t @@ -0,0 +1,5 @@ +w q.o +a +hello +. +q diff --git a/bin/ed/test/q1.err b/bin/ed/test/q1.err new file mode 100644 index 0000000000..0a7e178d20 --- /dev/null +++ b/bin/ed/test/q1.err @@ -0,0 +1 @@ +.q diff --git a/bin/ed/test/r1.d b/bin/ed/test/r1.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/r1.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/r1.err b/bin/ed/test/r1.err new file mode 100644 index 0000000000..269aa7cbcb --- /dev/null +++ b/bin/ed/test/r1.err @@ -0,0 +1 @@ +1,$r r1.err diff --git a/bin/ed/test/r1.r b/bin/ed/test/r1.r new file mode 100644 index 0000000000..a3ff506ec7 --- /dev/null +++ b/bin/ed/test/r1.r @@ -0,0 +1,7 @@ +line 1 +hello world +line 2 +line 3 +line 4 +line5 +hello world diff --git a/bin/ed/test/r1.t b/bin/ed/test/r1.t new file mode 100644 index 0000000000..d787a923e3 --- /dev/null +++ b/bin/ed/test/r1.t @@ -0,0 +1,3 @@ +1;r !echo hello world +1 +r !echo hello world diff --git a/bin/ed/test/r2.d b/bin/ed/test/r2.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/r2.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/r2.err b/bin/ed/test/r2.err new file mode 100644 index 0000000000..1c44fa3ea9 --- /dev/null +++ b/bin/ed/test/r2.err @@ -0,0 +1 @@ +r a-good-book diff --git a/bin/ed/test/r2.r b/bin/ed/test/r2.r new file mode 100644 index 0000000000..ac152ba9d0 --- /dev/null +++ b/bin/ed/test/r2.r @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line5 +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/r2.t b/bin/ed/test/r2.t new file mode 100644 index 0000000000..4286f428e3 --- /dev/null +++ b/bin/ed/test/r2.t @@ -0,0 +1 @@ +r diff --git a/bin/ed/test/r3.d b/bin/ed/test/r3.d new file mode 100644 index 0000000000..593eec6192 --- /dev/null +++ b/bin/ed/test/r3.d @@ -0,0 +1 @@ +r r3.t diff --git a/bin/ed/test/r3.r b/bin/ed/test/r3.r new file mode 100644 index 0000000000..86d5f904fc --- /dev/null +++ b/bin/ed/test/r3.r @@ -0,0 +1,2 @@ +r r3.t +r r3.t diff --git a/bin/ed/test/r3.t b/bin/ed/test/r3.t new file mode 100644 index 0000000000..593eec6192 --- /dev/null +++ b/bin/ed/test/r3.t @@ -0,0 +1 @@ +r r3.t diff --git a/bin/ed/test/s1.d b/bin/ed/test/s1.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/s1.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/s1.err b/bin/ed/test/s1.err new file mode 100644 index 0000000000..d7ca0cf480 --- /dev/null +++ b/bin/ed/test/s1.err @@ -0,0 +1 @@ +s . x diff --git a/bin/ed/test/s1.r b/bin/ed/test/s1.r new file mode 100644 index 0000000000..4eb0980cfe --- /dev/null +++ b/bin/ed/test/s1.r @@ -0,0 +1,5 @@ +liene 1 +(liene) (2) +(liene) (3) +liene (4) +(()liene5) diff --git a/bin/ed/test/s1.t b/bin/ed/test/s1.t new file mode 100644 index 0000000000..b0028bb6c9 --- /dev/null +++ b/bin/ed/test/s1.t @@ -0,0 +1,6 @@ +s/\([^ ][^ ]*\)/(\1)/g +2s +/3/s +/\(4\)/sr +/\(.\)/srg +%s/i/&e/ diff --git a/bin/ed/test/s10.err b/bin/ed/test/s10.err new file mode 100644 index 0000000000..0d8d83de19 --- /dev/null +++ b/bin/ed/test/s10.err @@ -0,0 +1,4 @@ +a +hello +. +s/[h[.]/x/ diff --git a/bin/ed/test/s2.d b/bin/ed/test/s2.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/s2.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/s2.err b/bin/ed/test/s2.err new file mode 100644 index 0000000000..b5c851df7b --- /dev/null +++ b/bin/ed/test/s2.err @@ -0,0 +1,4 @@ +a +a +. +s/x*/a/g diff --git a/bin/ed/test/s2.r b/bin/ed/test/s2.r new file mode 100644 index 0000000000..ca305c8d50 --- /dev/null +++ b/bin/ed/test/s2.r @@ -0,0 +1,5 @@ +li(n)e 1 +i(n)e 200 +li(n)e 3 +li(n)e 4 +li(n)e500 diff --git a/bin/ed/test/s2.t b/bin/ed/test/s2.t new file mode 100644 index 0000000000..f36584997c --- /dev/null +++ b/bin/ed/test/s2.t @@ -0,0 +1,4 @@ +,s/./(&)/3 +s/$/00 +2s//%/g +s/^l diff --git a/bin/ed/test/s3.d b/bin/ed/test/s3.d new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/ed/test/s3.err b/bin/ed/test/s3.err new file mode 100644 index 0000000000..d68c7d07f9 --- /dev/null +++ b/bin/ed/test/s3.err @@ -0,0 +1 @@ +s/[xyx/a/ diff --git a/bin/ed/test/s3.r b/bin/ed/test/s3.r new file mode 100644 index 0000000000..d6cada2212 --- /dev/null +++ b/bin/ed/test/s3.r @@ -0,0 +1 @@ +hello world diff --git a/bin/ed/test/s3.t b/bin/ed/test/s3.t new file mode 100644 index 0000000000..fbf880304b --- /dev/null +++ b/bin/ed/test/s3.t @@ -0,0 +1,6 @@ +a +hello/[]world +. +s/[/]/ / +s/[[:digit:][]/ / +s/[]]/ / diff --git a/bin/ed/test/s4.err b/bin/ed/test/s4.err new file mode 100644 index 0000000000..35b609fc62 --- /dev/null +++ b/bin/ed/test/s4.err @@ -0,0 +1 @@ +s/\a\b\c/xyz/ diff --git a/bin/ed/test/s5.err b/bin/ed/test/s5.err new file mode 100644 index 0000000000..89104c5523 --- /dev/null +++ b/bin/ed/test/s5.err @@ -0,0 +1 @@ +s//xyz/ diff --git a/bin/ed/test/s6.err b/bin/ed/test/s6.err new file mode 100644 index 0000000000..b4785957bc --- /dev/null +++ b/bin/ed/test/s6.err @@ -0,0 +1 @@ +s diff --git a/bin/ed/test/s7.err b/bin/ed/test/s7.err new file mode 100644 index 0000000000..30ba4fded7 --- /dev/null +++ b/bin/ed/test/s7.err @@ -0,0 +1,5 @@ +a +hello world +. +/./ +sr diff --git a/bin/ed/test/s8.err b/bin/ed/test/s8.err new file mode 100644 index 0000000000..5665767c3f --- /dev/null +++ b/bin/ed/test/s8.err @@ -0,0 +1,4 @@ +a +hello +. +s/[h[=]/x/ diff --git a/bin/ed/test/s9.err b/bin/ed/test/s9.err new file mode 100644 index 0000000000..1ff16dd847 --- /dev/null +++ b/bin/ed/test/s9.err @@ -0,0 +1,4 @@ +a +hello +. +s/[h[:]/x/ diff --git a/bin/ed/test/t.d b/bin/ed/test/t.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/t.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/t.r b/bin/ed/test/t.r new file mode 100644 index 0000000000..2b2854758d --- /dev/null +++ b/bin/ed/test/t.r @@ -0,0 +1,16 @@ +line 1 +line 1 +line 1 +line 2 +line 2 +line 3 +line 4 +line5 +line 1 +line 1 +line 1 +line 2 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/t1.d b/bin/ed/test/t1.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/t1.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/t1.err b/bin/ed/test/t1.err new file mode 100644 index 0000000000..c49c556e0e --- /dev/null +++ b/bin/ed/test/t1.err @@ -0,0 +1 @@ +tt diff --git a/bin/ed/test/t1.r b/bin/ed/test/t1.r new file mode 100644 index 0000000000..2b2854758d --- /dev/null +++ b/bin/ed/test/t1.r @@ -0,0 +1,16 @@ +line 1 +line 1 +line 1 +line 2 +line 2 +line 3 +line 4 +line5 +line 1 +line 1 +line 1 +line 2 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/t1.t b/bin/ed/test/t1.t new file mode 100644 index 0000000000..6b66163793 --- /dev/null +++ b/bin/ed/test/t1.t @@ -0,0 +1,3 @@ +1t0 +2,3t2 +,t$ diff --git a/bin/ed/test/t2.d b/bin/ed/test/t2.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/t2.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/t2.err b/bin/ed/test/t2.err new file mode 100644 index 0000000000..c202051b7f --- /dev/null +++ b/bin/ed/test/t2.err @@ -0,0 +1 @@ +t0;-1 diff --git a/bin/ed/test/t2.r b/bin/ed/test/t2.r new file mode 100644 index 0000000000..0c75ff554c --- /dev/null +++ b/bin/ed/test/t2.r @@ -0,0 +1,6 @@ +line 1 +line5 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/t2.t b/bin/ed/test/t2.t new file mode 100644 index 0000000000..5175abdec9 --- /dev/null +++ b/bin/ed/test/t2.t @@ -0,0 +1 @@ +t0;/./ diff --git a/bin/ed/test/u.d b/bin/ed/test/u.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/u.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/u.err b/bin/ed/test/u.err new file mode 100644 index 0000000000..caa1ba1148 --- /dev/null +++ b/bin/ed/test/u.err @@ -0,0 +1 @@ +.u diff --git a/bin/ed/test/u.r b/bin/ed/test/u.r new file mode 100644 index 0000000000..ad558d82d0 --- /dev/null +++ b/bin/ed/test/u.r @@ -0,0 +1,9 @@ +line 1 +hello +hello world!! +line 2 +line 3 +line 4 +line5 +hello +hello world!! diff --git a/bin/ed/test/u.t b/bin/ed/test/u.t new file mode 100644 index 0000000000..131cb6e25c --- /dev/null +++ b/bin/ed/test/u.t @@ -0,0 +1,31 @@ +1;r u.t +u +a +hello +world +. +g/./s//x/\ +a\ +hello\ +world +u +u +u +a +hello world! +. +u +1,$d +u +2,3d +u +c +hello world!! +. +u +u +-1;.,+1j +u +u +u +.,+1t$ diff --git a/bin/ed/test/v.d b/bin/ed/test/v.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/v.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/v.r b/bin/ed/test/v.r new file mode 100644 index 0000000000..714db63e35 --- /dev/null +++ b/bin/ed/test/v.r @@ -0,0 +1,11 @@ +line5 +order +hello world +line 1 +order +line 2 +order +line 3 +order +line 4 +order diff --git a/bin/ed/test/v.t b/bin/ed/test/v.t new file mode 100644 index 0000000000..608a77fb6a --- /dev/null +++ b/bin/ed/test/v.t @@ -0,0 +1,6 @@ +v/[ ]/m0 +v/[ ]/s/$/\ +hello world +v/hello /s/lo/p!/\ +a\ +order diff --git a/bin/ed/test/w.d b/bin/ed/test/w.d new file mode 100644 index 0000000000..92f337e977 --- /dev/null +++ b/bin/ed/test/w.d @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/w.r b/bin/ed/test/w.r new file mode 100644 index 0000000000..ac152ba9d0 --- /dev/null +++ b/bin/ed/test/w.r @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line5 +line 1 +line 2 +line 3 +line 4 +line5 diff --git a/bin/ed/test/w.t b/bin/ed/test/w.t new file mode 100644 index 0000000000..c2e18bd2f6 --- /dev/null +++ b/bin/ed/test/w.t @@ -0,0 +1,2 @@ +w !cat >\!.z +r \!.z diff --git a/bin/ed/test/w1.err b/bin/ed/test/w1.err new file mode 100644 index 0000000000..e2c8a603f7 --- /dev/null +++ b/bin/ed/test/w1.err @@ -0,0 +1 @@ +w /to/some/far-away/place diff --git a/bin/ed/test/w2.err b/bin/ed/test/w2.err new file mode 100644 index 0000000000..9daf89cfa7 --- /dev/null +++ b/bin/ed/test/w2.err @@ -0,0 +1 @@ +ww.o diff --git a/bin/ed/test/w3.err b/bin/ed/test/w3.err new file mode 100644 index 0000000000..39bbf4c95b --- /dev/null +++ b/bin/ed/test/w3.err @@ -0,0 +1 @@ +wqp w.o diff --git a/bin/ed/test/x.err b/bin/ed/test/x.err new file mode 100644 index 0000000000..0953f01dd0 --- /dev/null +++ b/bin/ed/test/x.err @@ -0,0 +1 @@ +.x diff --git a/bin/ed/test/z.err b/bin/ed/test/z.err new file mode 100644 index 0000000000..6a51a2d583 --- /dev/null +++ b/bin/ed/test/z.err @@ -0,0 +1,2 @@ +z +z