change the replacement algoricthm to only replace when:
[unix-history] / usr / src / usr.bin / sed / process.c
CommitLineData
2b932e6d
KB
1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
36646721 3 * Copyright (c) 1992, 1993, 1994
fceb5f61 4 * The Regents of the University of California. All rights reserved.
2b932e6d
KB
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * %sccs.include.redist.c%
10 */
11
12#ifndef lint
b436b07a 13static char sccsid[] = "@(#)process.c 8.5 (Berkeley) %G%";
2b932e6d
KB
14#endif /* not lint */
15
16#include <sys/types.h>
17#include <sys/stat.h>
18#include <sys/ioctl.h>
19#include <sys/uio.h>
20
21#include <ctype.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <limits.h>
25#include <regex.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30
31#include "defs.h"
32#include "extern.h"
33
2b932e6d
KB
34static SPACE HS, PS, SS;
35#define pd PS.deleted
36#define ps PS.space
37#define psl PS.len
38#define hs HS.space
39#define hsl HS.len
40
41static inline int applies __P((struct s_command *));
2b932e6d
KB
42static void flush_appends __P((void));
43static void lputs __P((char *));
662b4df1 44static inline int regexec_e __P((regex_t *, const char *, int, int, size_t));
be479f2c 45static void regsub __P((SPACE *, char *, char *));
2b932e6d
KB
46static int substitute __P((struct s_command *));
47
48struct s_appends *appends; /* Array of pointers to strings to append. */
49static int appendx; /* Index into appends array. */
50int appendnum; /* Size of appends array. */
51
52static int lastaddr; /* Set by applies if last address of a range. */
53static int sdone; /* If any substitutes since last line input. */
54 /* Iov structure for 'w' commands. */
f3e85330 55static regex_t *defpreg;
be479f2c 56size_t maxnsub;
eb9bb612 57regmatch_t *match;
662b4df1 58
eb9bb612 59#define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
f3e85330 60
2b932e6d
KB
61void
62process()
63{
64 struct s_command *cp;
65 SPACE tspace;
66 size_t len;
be479f2c 67 int r;
2b932e6d
KB
68 char oldc, *p;
69
be479f2c 70 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
2b932e6d
KB
71 pd = 0;
72 cp = prog;
73redirect:
74 while (cp != NULL) {
75 if (!applies(cp)) {
76 cp = cp->next;
77 continue;
78 }
79 switch (cp->code) {
80 case '{':
81 cp = cp->u.c;
82 goto redirect;
83 case 'a':
84 if (appendx >= appendnum)
85 appends = xrealloc(appends,
86 sizeof(struct s_appends) *
87 (appendnum *= 2));
88 appends[appendx].type = AP_STRING;
89 appends[appendx].s = cp->t;
662b4df1 90 appends[appendx].len = strlen(cp->t);
2b932e6d
KB
91 appendx++;
92 break;
93 case 'b':
94 cp = cp->u.c;
95 goto redirect;
96 case 'c':
97 pd = 1;
98 psl = 0;
99 if (cp->a2 == NULL || lastaddr)
100 (void)printf("%s", cp->t);
101 break;
102 case 'd':
bbb5d6cb 103 pd = 1;
2b932e6d
KB
104 goto new;
105 case 'D':
106 if (pd)
107 goto new;
c4948374 108 if ((p = memchr(ps, '\n', psl)) == NULL)
bbb5d6cb
KB
109 pd = 1;
110 else {
bfdff192 111 psl -= (p - ps) + 1;
2b932e6d
KB
112 memmove(ps, p + 1, psl);
113 }
114 goto new;
115 case 'g':
be479f2c 116 cspace(&PS, hs, hsl, REPLACE);
2b932e6d
KB
117 break;
118 case 'G':
eb9bb612 119 cspace(&PS, hs, hsl, 0);
2b932e6d
KB
120 break;
121 case 'h':
be479f2c 122 cspace(&HS, ps, psl, REPLACE);
2b932e6d
KB
123 break;
124 case 'H':
eb9bb612 125 cspace(&HS, ps, psl, 0);
2b932e6d
KB
126 break;
127 case 'i':
128 (void)printf("%s", cp->t);
129 break;
130 case 'l':
131 lputs(ps);
132 break;
133 case 'n':
134 if (!nflag && !pd)
662b4df1 135 OUT(ps)
2b932e6d 136 flush_appends();
be479f2c 137 r = mf_fgets(&PS, REPLACE);
2b932e6d 138#ifdef HISTORIC_PRACTICE
be479f2c 139 if (!r)
2b932e6d
KB
140 exit(0);
141#endif
142 pd = 0;
143 break;
144 case 'N':
145 flush_appends();
eb9bb612 146 if (!mf_fgets(&PS, 0)) {
2b932e6d 147 if (!nflag && !pd)
662b4df1 148 OUT(ps)
2b932e6d
KB
149 exit(0);
150 }
2b932e6d
KB
151 break;
152 case 'p':
153 if (pd)
154 break;
662b4df1 155 OUT(ps)
2b932e6d
KB
156 break;
157 case 'P':
158 if (pd)
159 break;
c4948374 160 if ((p = memchr(ps, '\n', psl)) != NULL) {
2b932e6d
KB
161 oldc = *p;
162 *p = '\0';
163 }
662b4df1 164 OUT(ps)
2b932e6d
KB
165 if (p != NULL)
166 *p = oldc;
167 break;
168 case 'q':
169 if (!nflag && !pd)
662b4df1 170 OUT(ps)
2b932e6d
KB
171 flush_appends();
172 exit(0);
173 case 'r':
174 if (appendx >= appendnum)
175 appends = xrealloc(appends,
176 sizeof(struct s_appends) *
177 (appendnum *= 2));
178 appends[appendx].type = AP_FILE;
179 appends[appendx].s = cp->t;
662b4df1 180 appends[appendx].len = strlen(cp->t);
2b932e6d
KB
181 appendx++;
182 break;
183 case 's':
a597182f 184 sdone |= substitute(cp);
2b932e6d
KB
185 break;
186 case 't':
187 if (sdone) {
188 sdone = 0;
189 cp = cp->u.c;
190 goto redirect;
191 }
192 break;
193 case 'w':
194 if (pd)
195 break;
196 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
197 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
198 DEFFILEMODE)) == -1)
199 err(FATAL, "%s: %s\n",
200 cp->t, strerror(errno));
eb9bb612 201 if (write(cp->u.fd, ps, psl) != psl)
2b932e6d
KB
202 err(FATAL, "%s: %s\n",
203 cp->t, strerror(errno));
204 break;
205 case 'x':
d44d58bb
KB
206 if (hs == NULL)
207 cspace(&HS, "", 0, REPLACE);
2b932e6d
KB
208 tspace = PS;
209 PS = HS;
210 HS = tspace;
211 break;
212 case 'y':
213 if (pd)
214 break;
eb9bb612 215 for (p = ps, len = psl; --len; ++p)
2b932e6d
KB
216 *p = cp->u.y[*p];
217 break;
218 case ':':
219 case '}':
220 break;
221 case '=':
222 (void)printf("%lu\n", linenum);
223 }
224 cp = cp->next;
225 } /* for all cp */
226
227new: if (!nflag && !pd)
662b4df1 228 OUT(ps)
2b932e6d
KB
229 flush_appends();
230 } /* for all lines */
231}
232
f3e85330
KB
233/*
234 * TRUE if the address passed matches the current program state
235 * (lastline, linenumber, ps).
236 */
662b4df1
EA
237#define MATCH(a) \
238 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
f3e85330
KB
239 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
240
2b932e6d
KB
241/*
242 * Return TRUE if the command applies to the current line. Sets the inrange
243 * flag to process ranges. Interprets the non-select (``!'') flag.
244 */
245static inline int
246applies(cp)
247 struct s_command *cp;
248{
249 int r;
250
251 lastaddr = 0;
252 if (cp->a1 == NULL && cp->a2 == NULL)
253 r = 1;
254 else if (cp->a2)
255 if (cp->inrange) {
f3e85330 256 if (MATCH(cp->a2)) {
2b932e6d
KB
257 cp->inrange = 0;
258 lastaddr = 1;
259 }
260 r = 1;
f3e85330 261 } else if (MATCH(cp->a1)) {
2b932e6d
KB
262 /*
263 * If the second address is a number less than or
264 * equal to the line number first selected, only
265 * one line shall be selected.
266 * -- POSIX 1003.2
267 */
268 if (cp->a2->type == AT_LINE &&
269 linenum >= cp->a2->u.l)
270 lastaddr = 1;
271 else
272 cp->inrange = 1;
273 r = 1;
274 } else
275 r = 0;
276 else
f3e85330 277 r = MATCH(cp->a1);
2b932e6d
KB
278 return (cp->nonsel ? ! r : r);
279}
280
2b932e6d
KB
281/*
282 * substitute --
283 * Do substitutions in the pattern space. Currently, we build a
284 * copy of the new pattern space in the substitute space structure
285 * and then swap them.
286 */
287static int
288substitute(cp)
289 struct s_command *cp;
290{
291 SPACE tspace;
f3e85330 292 regex_t *re;
662b4df1 293 size_t re_off, slen;
b436b07a 294 int lastempty, n;
c64b9ea5 295 char *s;
2b932e6d
KB
296
297 s = ps;
f3e85330
KB
298 re = cp->u.s->re;
299 if (re == NULL) {
be479f2c 300 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
f3e85330
KB
301 linenum = cp->u.s->linenum;
302 err(COMPILE, "\\%d not defined in the RE",
303 cp->u.s->maxbref);
304 }
be479f2c 305 }
662b4df1 306 if (!regexec_e(re, s, 0, 0, psl))
2b932e6d
KB
307 return (0);
308
b436b07a
KB
309 SS.len = 0; /* Clean substitute space. */
310 slen = psl;
311 n = cp->u.s->n;
312 lastempty = 1;
313
314 switch (n) {
315 case 0: /* Global */
316 do {
317 if (lastempty || match[0].rm_so != match[0].rm_eo) {
318 /* Locate start of replaced string. */
319 re_off = match[0].rm_so;
320 /* Copy leading retained string. */
321 cspace(&SS, s, re_off, APPEND);
322 /* Add in regular expression. */
323 regsub(&SS, s, cp->u.s->new);
324 }
a3a33365 325
b436b07a
KB
326 /* Move past this match. */
327 if (match[0].rm_so != match[0].rm_eo) {
328 s += match[0].rm_eo;
329 slen -= match[0].rm_eo;
330 lastempty = 0;
331 } else {
332 if (match[0].rm_so == 0)
333 cspace(&SS,
334 s, match[0].rm_so + 1, APPEND);
335 else
336 cspace(&SS,
337 s + match[0].rm_so, 1, APPEND);
338 s += match[0].rm_so + 1;
339 slen -= match[0].rm_so + 1;
340 lastempty = 1;
341 }
342 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
2b932e6d 343 /* Copy trailing retained string. */
b436b07a
KB
344 if (slen > 0)
345 cspace(&SS, s, slen, APPEND);
346 break;
2b932e6d
KB
347 default: /* Nth occurrence */
348 while (--n) {
be479f2c 349 s += match[0].rm_eo;
662b4df1
EA
350 slen -= match[0].rm_eo;
351 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
2b932e6d
KB
352 return (0);
353 }
354 /* FALLTHROUGH */
355 case 1: /* 1st occurrence */
356 /* Locate start of replaced string. */
be479f2c 357 re_off = match[0].rm_so + (s - ps);
2b932e6d 358 /* Copy leading retained string. */
be479f2c 359 cspace(&SS, ps, re_off, APPEND);
2b932e6d 360 /* Add in regular expression. */
be479f2c 361 regsub(&SS, s, cp->u.s->new);
2b932e6d 362 /* Copy trailing retained string. */
be479f2c 363 s += match[0].rm_eo;
662b4df1
EA
364 slen -= match[0].rm_eo;
365 cspace(&SS, s, slen, APPEND);
2b932e6d
KB
366 break;
367 }
368
369 /*
370 * Swap the substitute space and the pattern space, and make sure
371 * that any leftover pointers into stdio memory get lost.
372 */
373 tspace = PS;
374 PS = SS;
375 SS = tspace;
376 SS.space = SS.back;
377
378 /* Handle the 'p' flag. */
379 if (cp->u.s->p)
662b4df1 380 OUT(ps)
2b932e6d
KB
381
382 /* Handle the 'w' flag. */
383 if (cp->u.s->wfile && !pd) {
384 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
385 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
386 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
eb9bb612 387 if (write(cp->u.s->wfd, ps, psl) != psl)
2b932e6d
KB
388 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
389 }
390 return (1);
391}
392
393/*
394 * Flush append requests. Always called before reading a line,
395 * therefore it also resets the substitution done (sdone) flag.
396 */
397static void
398flush_appends()
399{
400 FILE *f;
401 int count, i;
402 char buf[8 * 1024];
403
404 for (i = 0; i < appendx; i++)
405 switch (appends[i].type) {
406 case AP_STRING:
662b4df1
EA
407 fwrite(appends[i].s, sizeof(char), appends[i].len,
408 stdout);
2b932e6d
KB
409 break;
410 case AP_FILE:
411 /*
412 * Read files probably shouldn't be cached. Since
413 * it's not an error to read a non-existent file,
414 * it's possible that another program is interacting
415 * with the sed script through the file system. It
416 * would be truly bizarre, but possible. It's probably
417 * not that big a performance win, anyhow.
418 */
419 if ((f = fopen(appends[i].s, "r")) == NULL)
420 break;
eb9bb612 421 while (count = fread(buf, sizeof(char), sizeof(buf), f))
662b4df1 422 (void)fwrite(buf, sizeof(char), count, stdout);
2b932e6d
KB
423 (void)fclose(f);
424 break;
425 }
426 if (ferror(stdout))
427 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
a597182f 428 appendx = sdone = 0;
2b932e6d
KB
429}
430
431static void
432lputs(s)
433 register char *s;
434{
435 register int count;
436 register char *escapes, *p;
437 struct winsize win;
438 static int termwidth = -1;
439
440 if (termwidth == -1)
441 if (p = getenv("COLUMNS"))
442 termwidth = atoi(p);
443 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
444 win.ws_col > 0)
445 termwidth = win.ws_col;
446 else
447 termwidth = 60;
448
449 for (count = 0; *s; ++s) {
450 if (count >= termwidth) {
451 (void)printf("\\\n");
452 count = 0;
453 }
454 if (isascii(*s) && isprint(*s) && *s != '\\') {
455 (void)putchar(*s);
456 count++;
457 } else {
458 escapes = "\\\a\b\f\n\r\t\v";
459 (void)putchar('\\');
460 if (p = strchr(escapes, *s)) {
461 (void)putchar("\\abfnrtv"[p - escapes]);
462 count += 2;
463 } else {
eb9bb612 464 (void)printf("%03o", *(u_char *)s);
2b932e6d
KB
465 count += 4;
466 }
467 }
468 }
469 (void)putchar('$');
470 (void)putchar('\n');
471 if (ferror(stdout))
472 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
473}
474
f3e85330 475static inline int
662b4df1 476regexec_e(preg, string, eflags, nomatch, slen)
2b932e6d
KB
477 regex_t *preg;
478 const char *string;
be479f2c 479 int eflags, nomatch;
662b4df1 480 size_t slen;
2b932e6d
KB
481{
482 int eval;
662b4df1 483
f3e85330
KB
484 if (preg == NULL) {
485 if (defpreg == NULL)
486 err(FATAL, "first RE may not be empty");
be479f2c 487 } else
f3e85330 488 defpreg = preg;
f3e85330 489
6e2c3434
CT
490 /* Set anchors, discounting trailing newline (if any). */
491 if (slen > 0 && string[slen - 1] == '\n')
492 slen--;
eb9bb612 493 match[0].rm_so = 0;
6e2c3434 494 match[0].rm_eo = slen;
eb9bb612 495
be479f2c 496 eval = regexec(defpreg, string,
eb9bb612 497 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
f3e85330 498 switch(eval) {
2b932e6d 499 case 0:
f3e85330 500 return (1);
2b932e6d 501 case REG_NOMATCH:
f3e85330 502 return (0);
2b932e6d 503 }
f3e85330 504 err(FATAL, "RE error: %s", strregerror(eval, defpreg));
2b932e6d
KB
505 /* NOTREACHED */
506}
507
508/*
509 * regsub - perform substitutions after a regexp match
510 * Based on a routine by Henry Spencer
511 */
512static void
be479f2c 513regsub(sp, string, src)
2b932e6d 514 SPACE *sp;
be479f2c 515 char *string, *src;
2b932e6d
KB
516{
517 register int len, no;
518 register char c, *dst;
519
520#define NEEDSP(reqlen) \
521 if (sp->len >= sp->blen - (reqlen) - 1) { \
522 sp->blen += (reqlen) + 1024; \
523 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
524 dst = sp->space + sp->len; \
525 }
526
527 dst = sp->space + sp->len;
528 while ((c = *src++) != '\0') {
529 if (c == '&')
530 no = 0;
531 else if (c == '\\' && isdigit(*src))
532 no = *src++ - '0';
533 else
534 no = -1;
535 if (no < 0) { /* Ordinary character. */
536 if (c == '\\' && (*src == '\\' || *src == '&'))
537 c = *src++;
538 NEEDSP(1);
539 *dst++ = c;
540 ++sp->len;
be479f2c
KB
541 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
542 len = match[no].rm_eo - match[no].rm_so;
2b932e6d 543 NEEDSP(len);
be479f2c 544 memmove(dst, string + match[no].rm_so, len);
2b932e6d
KB
545 dst += len;
546 sp->len += len;
547 }
548 }
549 NEEDSP(1);
550 *dst = '\0';
551}
552
553/*
554 * aspace --
555 * Append the source space to the destination space, allocating new
556 * space as necessary.
557 */
be479f2c
KB
558void
559cspace(sp, p, len, spflag)
2b932e6d
KB
560 SPACE *sp;
561 char *p;
562 size_t len;
be479f2c 563 enum e_spflag spflag;
2b932e6d
KB
564{
565 size_t tlen;
2b932e6d 566
eb9bb612
EA
567 /* Make sure SPACE has enough memory and ramp up quickly. */
568 tlen = sp->len + len + 1;
2b932e6d
KB
569 if (tlen > sp->blen) {
570 sp->blen = tlen + 1024;
be479f2c 571 sp->space = sp->back = xrealloc(sp->back, sp->blen);
2b932e6d
KB
572 }
573
eb9bb612 574 if (spflag == REPLACE)
be479f2c 575 sp->len = 0;
2b932e6d 576
be479f2c 577 memmove(sp->space + sp->len, p, len);
662b4df1 578
be479f2c 579 sp->space[sp->len += len] = '\0';
2b932e6d
KB
580}
581
582/*
583 * Close all cached opened files and report any errors
584 */
585void
680fc01f
KB
586cfclose(cp, end)
587 register struct s_command *cp, *end;
2b932e6d
KB
588{
589
680fc01f 590 for (; cp != end; cp = cp->next)
2b932e6d
KB
591 switch(cp->code) {
592 case 's':
593 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
594 err(FATAL,
595 "%s: %s", cp->u.s->wfile, strerror(errno));
296d6245 596 cp->u.s->wfd = -1;
2b932e6d
KB
597 break;
598 case 'w':
599 if (cp->u.fd != -1 && close(cp->u.fd))
600 err(FATAL, "%s: %s", cp->t, strerror(errno));
296d6245 601 cp->u.fd = -1;
2b932e6d
KB
602 break;
603 case '{':
680fc01f 604 cfclose(cp->u.c, cp->next);
2b932e6d
KB
605 break;
606 }
607}