* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
static char sccsid
[] = "@(#)ex_subst.c 8.43 (Berkeley) 4/12/94";
#define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
#define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
static int checkmatchsize
__P((SCR
*, regex_t
*));
static inline int regsub
__P((SCR
*,
char *, char **, size_t *, size_t *));
static int substitute
__P((SCR
*, EXF
*,
EXCMDARG
*, char *, regex_t
*, u_int
));
* [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
* Substitute on lines matching a pattern.
ex_substitute(sp
, ep
, cmdp
)
int delim
, eval
, reflags
, replaced
;
char *bp
, *ptrn
, *rep
, *p
, *t
;
* Skip leading white space.
* Historic vi allowed any non-alphanumeric to serve as the
* substitution command delimiter.
* If the arguments are empty, it's the same as &, i.e. we
* repeat the last substitution.
for (p
= cmdp
->argv
[0]->bp
,
len
= cmdp
->argv
[0]->len
; len
> 0; --len
, ++p
) {
return (ex_subagain(sp
, ep
, cmdp
));
return (substitute(sp
, ep
,
cmdp
, p
, &sp
->subre
, SUB_MUSTSETR
));
* The full-blown substitute command reset the remembered
* state of the 'c' and 'g' suffices.
sp
->c_suffix
= sp
->g_suffix
= 0;
* Get the pattern string, toss escaped characters.
* Historic vi accepted any of the following forms:
* :s/abc/def/ change "abc" to "def"
* :s/abc/def change "abc" to "def"
* Only toss an escape character if it escapes a delimiter.
* This means that "s/A/\\\\f" replaces "A" with "\\f". It
* would be nice to be more regular, i.e. for each layer of
* escaping a single escape character is removed, but that's
* not how the historic vi worked.
if (p
[0] == '\0' || p
[0] == delim
) {
* Nul terminate the pattern string -- it's passed
* to regcomp which doesn't understand anything else.
* If the pattern string is empty, use the last RE (not just the
if (!F_ISSET(sp
, S_SRE_SET
)) {
msgq(sp
, M_ERR
, "No previous regular expression.");
if (O_ISSET(sp
, O_EXTENDED
))
if (O_ISSET(sp
, O_IGNORECASE
))
/* Convert vi-style RE's to POSIX 1003.2 RE's. */
if (re_conv(sp
, &ptrn
, &replaced
))
eval
= regcomp(&lre
, (char *)ptrn
, reflags
);
/* Free up any allocated memory. */
re_error(sp
, eval
, &lre
);
* Historic practice is that substitutes set the search
* direction as well as both substitute and search RE's.
* Get the replacement string.
* The special character & (\& if O_MAGIC not set) matches the
* entire RE. No handling of & is required here, it's done by
* The special character ~ (\~ if O_MAGIC not set) inserts the
* previous replacement string into this replacement string.
* Count ~'s to figure out how much space we need. We could
* special case nonexistent last patterns or whether or not
* O_MAGIC is set, but it's probably not worth the effort.
* Only toss an escape character if it escapes a delimiter or
* if O_MAGIC is set and it escapes a tilde.
* If the entire replacement pattern is "%", then use the last
* replacement pattern. This semantic was added to vi in System
* V and then percolated elsewhere, presumably around the time
* that it was added to their version of ed(1).
if (p
[0] == '\0' || p
[0] == delim
) {
FREE(sp
->repl
, sp
->repl_len
);
} else if (p
[0] == '%' && (p
[1] == '\0' || p
[1] == delim
)) {
if (p
[1] == delim
&& p
[2] != '\0')
p
[0] != '\0' && p
[0] != delim
; ++p
, ++len
)
GET_SPACE_RET(sp
, bp
, blen
, len
);
for (t
= bp
, len
= 0, p
= rep
;;) {
if (p
[0] == '\0' || p
[0] == delim
) {
} else if (p
[1] == '~') {
if (!O_ISSET(sp
, O_MAGIC
))
} else if (p
[0] == '~' && O_ISSET(sp
, O_MAGIC
)) {
memmove(t
, sp
->repl
, sp
->repl_len
);
FREE(sp
->repl
, sp
->repl_len
);
if ((sp
->repl
= malloc(len
)) == NULL
) {
msgq(sp
, M_SYSERR
, NULL
);
FREE_SPACE(sp
, bp
, blen
);
memmove(sp
->repl
, bp
, len
);
FREE_SPACE(sp
, bp
, blen
);
if (checkmatchsize(sp
, &sp
->subre
))
return (substitute(sp
, ep
, cmdp
, p
, re
, flags
));
* [line [,line]] & [cgr] [count] [#lp]]
* Substitute using the last substitute RE and replacement pattern.
ex_subagain(sp
, ep
, cmdp
)
if (!F_ISSET(sp
, S_SUBRE_SET
)) {
msgq(sp
, M_ERR
, "No previous regular expression.");
return (substitute(sp
, ep
, cmdp
, cmdp
->argv
[0]->bp
, &sp
->subre
, 0));
* [line [,line]] ~ [cgr] [count] [#lp]]
* Substitute using the last RE and last substitute replacement pattern.
ex_subtilde(sp
, ep
, cmdp
)
if (!F_ISSET(sp
, S_SRE_SET
)) {
msgq(sp
, M_ERR
, "No previous regular expression.");
return (substitute(sp
, ep
, cmdp
, cmdp
->argv
[0]->bp
, &sp
->sre
, 0));
* The nasty part of the substitution is what happens when the replacement
* string contains newlines. It's a bit tricky -- consider the information
* that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
* to build a set of newline offsets which we use to break the line up later,
* when the replacement is done. Don't change it unless you're pretty damned
#define NEEDNEWLINE(sp) { \
if (sp->newl_len == sp->newl_cnt) { \
REALLOC(sp, sp->newl, size_t *, \
sp->newl_len * sizeof(size_t)); \
if (sp->newl == NULL) { \
#define BUILD(sp, l, len) { \
if (lbclen + (len) > lblen) { \
lblen += MAX(lbclen + (len), 256); \
REALLOC(sp, lb, char *, lblen); \
memmove(lb + lbclen, l, len); \
#define NEEDSP(sp, len, pnt) { \
if (lbclen + (len) > lblen) { \
lblen += MAX(lbclen + (len), 256); \
REALLOC(sp, lb, char *, lblen); \
* Do the substitution. This stuff is *really* tricky. There are
* lots of special cases, and general nastiness. Don't mess with it
* unless you're pretty confident.
substitute(sp
, ep
, cmdp
, s
, re
, flags
)
size_t blen
, cnt
, last
, lbclen
, lblen
, len
, llen
, offset
, saved_offset
;
int cflag
, lflag
, nflag
, pflag
, rflag
;
int didsub
, do_eol_match
, eflags
, empty_ok
, eval
;
int linechanged
, matched
, quit
, rval
, teardown
;
* Historically, the 'g' and 'c' suffices were always toggled as flags,
* so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
* not set, they were initialized to 0 for all substitute commands. If
* O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
* specified substitute/replacement patterns (see ex_substitute()).
if (!O_ISSET(sp
, O_EDCOMPATIBLE
))
sp
->c_suffix
= sp
->g_suffix
= 0;
* Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
* it only displayed the last change. I'd disallow them, but they are
* useful in combination with the [v]global commands. In the current
* model the problem is combining them with the 'c' flag -- the screen
* would have to flip back and forth between the confirm screen and the
* ex print screen, which would be pretty awful. We do display all
* changes, though, for what that's worth.
* Historic vi was fairly strict about the order of "options", the
* count, and "flags". I'm somewhat fuzzy on the difference between
* options and flags, anyway, so this is a simpler approach, and we
* just take it them in whatever order the user gives them. (The ex
* usage statement doesn't reflect this.)
cflag
= lflag
= nflag
= pflag
= rflag
= 0;
for (lno
= OOBLNO
; *s
!= '\0'; ++s
)
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
lno
= strtoul(s
, &s
, 10);
if (*s
== '\0') /* Loop increment correction. */
msgq(sp
, M_ERR
, "Count overflow.");
else if (lno
== LONG_MIN
)
msgq(sp
, M_ERR
, "Count underflow.");
msgq(sp
, M_SYSERR
, NULL
);
* In historic vi, the count was inclusive from the
cmdp
->addr1
.lno
= cmdp
->addr2
.lno
;
cmdp
->addr2
.lno
+= lno
- 1;
sp
->c_suffix
= !sp
->c_suffix
;
sp
->g_suffix
= !sp
->g_suffix
;
if (LF_ISSET(SUB_FIRST
)) {
"Regular expression specified; r flag meaningless.");
if (!F_ISSET(sp
, S_SRE_SET
)) {
"No previous regular expression.");
if (*s
!= '\0' || !rflag
&& LF_ISSET(SUB_MUSTSETR
)) {
usage
: msgq(sp
, M_ERR
, "Usage: %s", cmdp
->cmd
->usage
);
if (IN_VI_MODE(sp
) && sp
->c_suffix
&& (lflag
|| nflag
|| pflag
)) {
"The #, l and p flags may not be combined with the c flag in vi mode.");
teardown
= !intr_init(sp
);
* bp: if interactive, line cache
* blen: if interactive, line cache length
* lb: build buffer pointer.
* lbclen: current length of built buffer.
* lblen; length of build buffer.
blen
= lbclen
= lblen
= 0;
for (matched
= quit
= 0, lno
= cmdp
->addr1
.lno
,
elno
= cmdp
->addr2
.lno
; !quit
&& lno
<= elno
; ++lno
) {
/* Someone's unhappy, time to stop. */
if (F_ISSET(sp
, S_INTERRUPTED
)) {
if (!F_ISSET(sp
, S_GLOBAL
))
msgq(sp
, M_INFO
, "Interrupted.");
if ((s
= file_gline(sp
, ep
, lno
, &llen
)) == NULL
) {
* Make a local copy if doing confirmation -- when calling
* the confirm routine we're likely to lose the cached copy.
GET_SPACE_RET(sp
, bp
, blen
, llen
);
ADD_SPACE_RET(sp
, bp
, blen
, llen
);
/* Start searching from the beginning. */
/* Reset the build buffer offset. */
/* Reset empty match flag. */
* We don't want to have to do a setline if the line didn't
* change -- keep track of whether or not this line changed.
* If doing confirmations, don't want to keep setting the
* line if change is refused -- keep track of substitutions.
didsub
= linechanged
= 0;
/* New line, do an EOL match. */
/* It's not nul terminated, but we pretend it is. */
* The search area is from s + offset to the EOL.
* Generally, sp->match[0].rm_so is the offset of the start
* of the match from the start of the search, and offset is
* the offset of the start of the last search.
nextmatch
: sp
->match
[0].rm_so
= 0;
sp
->match
[0].rm_eo
= len
;
/* Get the next match. */
(char *)s
+ offset
, re
->re_nsub
+ 1, sp
->match
, eflags
);
* There wasn't a match or if there was an error, deal with
* it. If there was a previous match in this line, resolve
* the changes into the database. Otherwise, just move on.
/* Only the first search can match an anchored expression. */
* It's possible to match 0-length strings -- for example, the
* command s;a*;X;, when matched against the string "aabb" will
* result in "XbXbX", i.e. the matches are "aa", the space
* between the b's and the space between the b's and the end of
* the string. There is a similar space between the beginning
* of the string and the a's. The rule that we use (because vi
* historically used it) is that any 0-length match, occurring
* immediately after a match, is ignored. Otherwise, the above
* example would have resulted in "XXbXbX". Another example is
* incorrectly using " *" to replace groups of spaces with one
* The way we do this is that if we just had a successful match,
* the starting offset does not skip characters, and the match
* is empty, ignore the match and move forward. If there's no
* more characters in the string, we were attempting to match
* after the last character, so quit.
sp
->match
[0].rm_so
== 0 && sp
->match
[0].rm_eo
== 0) {
* Set the cursor position for confirmation. Note,
* if we matched on a '$', the cursor may be past
* We may want to "fix" this in the confirm routine,
* if the confirm routine should be able to display
from
.cno
= sp
->match
[0].rm_so
+ offset
;
to
.cno
= sp
->match
[0].rm_eo
;
switch (sp
->s_confirm(sp
, ep
, &from
, &to
)) {
BUILD(sp
, s
+offset
, sp
->match
[0].rm_eo
);
/* If interruptible, pass the info back. */
if (F_ISSET(sp
, S_INTERRUPTIBLE
))
F_SET(sp
, S_INTERRUPTED
);
* If any changes, resolve them, otherwise
* return to the main loop.
/* Copy the bytes before the match into the build buffer. */
BUILD(sp
, s
+ offset
, sp
->match
[0].rm_so
);
/* Substitute the matching bytes. */
if (regsub(sp
, s
+ offset
, &lb
, &lbclen
, &lblen
))
/* Set the change flag so we know this line was modified. */
/* Move past the matched bytes. */
skip
: offset
+= sp
->match
[0].rm_eo
;
len
-= sp
->match
[0].rm_eo
;
/* A match cannot be followed by an empty pattern. */
* If doing a global change with confirmation, we have to
* update the screen. The basic idea is to store the line
* so the screen update routines can find it, and restart.
if (didsub
&& sp
->c_suffix
&& sp
->g_suffix
) {
* The new search offset will be the end of the
/* Copy the rest of the line. */
BUILD(sp
, s
+ offset
, len
)
/* Set the new offset. */
/* Store inserted lines, adjusting the build buffer. */
cnt
< sp
->newl_cnt
; ++cnt
, ++lno
, ++elno
) {
if (file_iline(sp
, ep
, lno
,
lb
+ last
, sp
->newl
[cnt
] - last
))
last
= sp
->newl
[cnt
] + 1;
/* Store and retrieve the line. */
if (file_sline(sp
, ep
, lno
, lb
+ last
, lbclen
))
if ((s
= file_gline(sp
, ep
, lno
, &llen
)) == NULL
) {
ADD_SPACE_RET(sp
, bp
, blen
, llen
)
* If we haven't already done the after-the-string
* match, do one. Set REG_NOTEOL so the '$' pattern
* If at the end of the string, do a test for the after
* the string match. Set REG_NOTEOL so the '$' pattern
if (sp
->g_suffix
&& do_eol_match
) {
endmatch
: if (!linechanged
)
/* Copy any remaining bytes into the build buffer. */
BUILD(sp
, s
+ offset
, len
)
/* Store inserted lines, adjusting the build buffer. */
cnt
< sp
->newl_cnt
; ++cnt
, ++lno
, ++elno
) {
lno
, lb
+ last
, sp
->newl
[cnt
] - last
))
last
= sp
->newl
[cnt
] + 1;
/* Store the changed line. */
if (file_sline(sp
, ep
, lno
, lb
+ last
, lbclen
))
/* Update changed line counter. */
++sp
->rptlines
[L_CHANGED
];
* Display as necessary. Historic practice is to only
* display the last line of a line split into multiple
if (lflag
|| nflag
|| pflag
) {
ex_print(sp
, ep
, &from
, &to
, E_F_LIST
);
ex_print(sp
, ep
, &from
, &to
, E_F_HASH
);
ex_print(sp
, ep
, &from
, &to
, E_F_PRINT
);
* Move the cursor to the last line changed.
* Move the cursor to the first non-blank of the last line
(void)nonblank(sp
, ep
, sp
->lno
, &sp
->cno
);
* If not in a global command, and nothing matched, say so.
* Else, if none of the lines displayed, put something up.
if (!F_ISSET(sp
, S_GLOBAL
))
msgq(sp
, M_INFO
, "No match found.");
} else if (!lflag
&& !nflag
&& !pflag
)
F_SET(EXP(sp
), EX_AUTOPRINT
);
FREE_SPACE(sp
, bp
, blen
);
* Do the substitution for a regular expression.
regsub(sp
, ip
, lbp
, lbclenp
, lblenp
)
char *ip
; /* Input line. */
size_t *lbclenp
, *lblenp
;
enum { C_NOTSET
, C_LOWER
, C_ONELOWER
, C_ONEUPPER
, C_UPPER
} conv
;
size_t lbclen
, lblen
; /* Local copies. */
size_t mlen
; /* Match length. */
size_t rpl
; /* Remaining replacement length. */
char *rp
; /* Replacement pointer. */
int no
; /* Match replacement offset. */
char *p
, *t
; /* Buffer pointers. */
char *lb
; /* Local copies. */
lb
= *lbp
; /* Get local copies. */
* There are some special sequences that vi provides in the
* & string the RE matched (\& if nomagic set)
* \# n-th regular subexpression
* \E end \U, \L conversion
* \e end \U, \L conversion
* \l convert the next character to lower-case
* \L convert to lower-case, until \E, \e, or end of replacement
* \u convert the next character to upper-case
* \U convert to upper-case, until \E, \e, or end of replacement
* Otherwise, since this is the lowest level of replacement, discard
* all escape characters. This (hopefully) follows historic practice.
u_int __value = KEY_VAL(sp, __ch); \
if (__value == K_CR || __value == K_NL) { \
sp->newl[sp->newl_cnt++] = lbclen; \
} else if (conv != C_NOTSET) { \
for (rp
= sp
->repl
, rpl
= sp
->repl_len
, p
= lb
+ lbclen
; rpl
--;) {
if (O_ISSET(sp
, O_MAGIC
)) {
if (!O_ISSET(sp
, O_MAGIC
)) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
subzero
: if (sp
->match
[no
].rm_so
== -1 ||
sp
->match
[no
].rm_eo
== -1)
sp
->match
[no
].rm_eo
- sp
->match
[no
].rm_so
;
for (t
= ip
+ sp
->match
[no
].rm_so
; mlen
--; ++t
)
*lbp
= lb
; /* Update caller's information. */
/* Build nsub array as necessary. */
if (sp
->matchsize
< re
->re_nsub
+ 1) {
sp
->matchsize
= re
->re_nsub
+ 1;
regmatch_t
*, sp
->matchsize
* sizeof(regmatch_t
));