Commit | Line | Data |
---|---|---|
c2199a45 AM |
1 | /*- |
2 | * Copyright (c) 1992 Diomidis Spinellis. | |
3 | * Copyright (c) 1992 The Regents of the University of California. | |
4 | * All rights reserved. | |
5 | * | |
6 | * This code is derived from software contributed to Berkeley by | |
7 | * Diomidis Spinellis of Imperial College, University of London. | |
8 | * | |
9 | * Redistribution and use in source and binary forms, with or without | |
10 | * modification, are permitted provided that the following conditions | |
11 | * are met: | |
12 | * 1. Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | |
14 | * 2. Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in the | |
16 | * documentation and/or other materials provided with the distribution. | |
17 | * 3. All advertising materials mentioning features or use of this software | |
18 | * must display the following acknowledgement: | |
19 | * This product includes software developed by the University of | |
20 | * California, Berkeley and its contributors. | |
21 | * 4. Neither the name of the University nor the names of its contributors | |
22 | * may be used to endorse or promote products derived from this software | |
23 | * without specific prior written permission. | |
24 | * | |
25 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
26 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
27 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
28 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
30 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
31 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
34 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
35 | * SUCH DAMAGE. | |
36 | */ | |
37 | ||
38 | #ifndef lint | |
39 | static char sccsid[] = "@(#)process.c 5.10 (Berkeley) 12/2/92"; | |
40 | #endif /* not lint */ | |
41 | ||
42 | #include <sys/types.h> | |
43 | #include <sys/stat.h> | |
44 | #include <sys/ioctl.h> | |
45 | #include <sys/uio.h> | |
46 | ||
47 | #include <ctype.h> | |
48 | #include <errno.h> | |
49 | #include <fcntl.h> | |
50 | #include <limits.h> | |
51 | #include <regex.h> | |
52 | #include <stdio.h> | |
53 | #include <stdlib.h> | |
54 | #include <string.h> | |
55 | #include <unistd.h> | |
56 | ||
57 | #include "defs.h" | |
58 | #include "extern.h" | |
59 | ||
60 | static SPACE HS = {""}, PS, SS; | |
61 | #define pd PS.deleted | |
62 | #define ps PS.space | |
63 | #define psl PS.len | |
64 | #define hs HS.space | |
65 | #define hsl HS.len | |
66 | ||
67 | static inline int applies __P((struct s_command *)); | |
68 | static void flush_appends __P((void)); | |
69 | static void lputs __P((char *)); | |
70 | static inline int regexec_e __P((regex_t *, const char *, int, int)); | |
71 | static void regsub __P((SPACE *, char *, char *)); | |
72 | static int substitute __P((struct s_command *)); | |
73 | ||
74 | struct s_appends *appends; /* Array of pointers to strings to append. */ | |
75 | static int appendx; /* Index into appends array. */ | |
76 | int appendnum; /* Size of appends array. */ | |
77 | ||
78 | static int lastaddr; /* Set by applies if last address of a range. */ | |
79 | static int sdone; /* If any substitutes since last line input. */ | |
80 | /* Iov structure for 'w' commands. */ | |
81 | static struct iovec iov[2] = { NULL, 0, "\n", 1 }; | |
82 | ||
83 | static regex_t *defpreg; | |
84 | size_t maxnsub; | |
85 | regmatch_t *match; | |
86 | ||
87 | void | |
88 | process() | |
89 | { | |
90 | struct s_command *cp; | |
91 | SPACE tspace; | |
92 | size_t len; | |
93 | int r; | |
94 | char oldc, *p; | |
95 | ||
96 | for (linenum = 0; mf_fgets(&PS, REPLACE);) { | |
97 | pd = 0; | |
98 | cp = prog; | |
99 | redirect: | |
100 | while (cp != NULL) { | |
101 | if (!applies(cp)) { | |
102 | cp = cp->next; | |
103 | continue; | |
104 | } | |
105 | switch (cp->code) { | |
106 | case '{': | |
107 | cp = cp->u.c; | |
108 | goto redirect; | |
109 | case 'a': | |
110 | if (appendx >= appendnum) | |
111 | appends = xrealloc(appends, | |
112 | sizeof(struct s_appends) * | |
113 | (appendnum *= 2)); | |
114 | appends[appendx].type = AP_STRING; | |
115 | appends[appendx].s = cp->t; | |
116 | appendx++; | |
117 | break; | |
118 | case 'b': | |
119 | cp = cp->u.c; | |
120 | goto redirect; | |
121 | case 'c': | |
122 | pd = 1; | |
123 | psl = 0; | |
124 | if (cp->a2 == NULL || lastaddr) | |
125 | (void)printf("%s", cp->t); | |
126 | break; | |
127 | case 'd': | |
128 | pd = 1; | |
129 | goto new; | |
130 | case 'D': | |
131 | if (pd) | |
132 | goto new; | |
133 | if ((p = strchr(ps, '\n')) == NULL) | |
134 | pd = 1; | |
135 | else { | |
136 | psl -= (p - ps) - 1; | |
137 | memmove(ps, p + 1, psl); | |
138 | } | |
139 | goto new; | |
140 | case 'g': | |
141 | cspace(&PS, hs, hsl, REPLACE); | |
142 | break; | |
143 | case 'G': | |
144 | cspace(&PS, hs, hsl, APPENDNL); | |
145 | break; | |
146 | case 'h': | |
147 | cspace(&HS, ps, psl, REPLACE); | |
148 | break; | |
149 | case 'H': | |
150 | cspace(&HS, ps, psl, APPENDNL); | |
151 | break; | |
152 | case 'i': | |
153 | (void)printf("%s", cp->t); | |
154 | break; | |
155 | case 'l': | |
156 | lputs(ps); | |
157 | break; | |
158 | case 'n': | |
159 | if (!nflag && !pd) | |
160 | (void)printf("%s\n", ps); | |
161 | flush_appends(); | |
162 | r = mf_fgets(&PS, REPLACE); | |
163 | #ifdef HISTORIC_PRACTICE | |
164 | if (!r) | |
165 | exit(0); | |
166 | #endif | |
167 | pd = 0; | |
168 | break; | |
169 | case 'N': | |
170 | flush_appends(); | |
171 | if (!mf_fgets(&PS, APPENDNL)) { | |
172 | if (!nflag && !pd) | |
173 | (void)printf("%s\n", ps); | |
174 | exit(0); | |
175 | } | |
176 | break; | |
177 | case 'p': | |
178 | if (pd) | |
179 | break; | |
180 | (void)printf("%s\n", ps); | |
181 | break; | |
182 | case 'P': | |
183 | if (pd) | |
184 | break; | |
185 | if ((p = strchr(ps, '\n')) != NULL) { | |
186 | oldc = *p; | |
187 | *p = '\0'; | |
188 | } | |
189 | (void)printf("%s\n", ps); | |
190 | if (p != NULL) | |
191 | *p = oldc; | |
192 | break; | |
193 | case 'q': | |
194 | if (!nflag && !pd) | |
195 | (void)printf("%s\n", ps); | |
196 | flush_appends(); | |
197 | exit(0); | |
198 | case 'r': | |
199 | if (appendx >= appendnum) | |
200 | appends = xrealloc(appends, | |
201 | sizeof(struct s_appends) * | |
202 | (appendnum *= 2)); | |
203 | appends[appendx].type = AP_FILE; | |
204 | appends[appendx].s = cp->t; | |
205 | appendx++; | |
206 | break; | |
207 | case 's': | |
208 | sdone |= substitute(cp); | |
209 | break; | |
210 | case 't': | |
211 | if (sdone) { | |
212 | sdone = 0; | |
213 | cp = cp->u.c; | |
214 | goto redirect; | |
215 | } | |
216 | break; | |
217 | case 'w': | |
218 | if (pd) | |
219 | break; | |
220 | if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, | |
221 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, | |
222 | DEFFILEMODE)) == -1) | |
223 | err(FATAL, "%s: %s\n", | |
224 | cp->t, strerror(errno)); | |
225 | iov[0].iov_base = ps; | |
226 | iov[0].iov_len = psl; | |
227 | if (writev(cp->u.fd, iov, 2) != psl + 1) | |
228 | err(FATAL, "%s: %s\n", | |
229 | cp->t, strerror(errno)); | |
230 | break; | |
231 | case 'x': | |
232 | tspace = PS; | |
233 | PS = HS; | |
234 | HS = tspace; | |
235 | break; | |
236 | case 'y': | |
237 | if (pd) | |
238 | break; | |
239 | for (p = ps, len = psl; len--; ++p) | |
240 | *p = cp->u.y[*p]; | |
241 | break; | |
242 | case ':': | |
243 | case '}': | |
244 | break; | |
245 | case '=': | |
246 | (void)printf("%lu\n", linenum); | |
247 | } | |
248 | cp = cp->next; | |
249 | } /* for all cp */ | |
250 | ||
251 | new: if (!nflag && !pd) | |
252 | (void)printf("%s\n", ps); | |
253 | flush_appends(); | |
254 | } /* for all lines */ | |
255 | } | |
256 | ||
257 | /* | |
258 | * TRUE if the address passed matches the current program state | |
259 | * (lastline, linenumber, ps). | |
260 | */ | |
261 | #define MATCH(a) \ | |
262 | (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1) : \ | |
263 | (a)->type == AT_LINE ? linenum == (a)->u.l : lastline | |
264 | ||
265 | /* | |
266 | * Return TRUE if the command applies to the current line. Sets the inrange | |
267 | * flag to process ranges. Interprets the non-select (``!'') flag. | |
268 | */ | |
269 | static inline int | |
270 | applies(cp) | |
271 | struct s_command *cp; | |
272 | { | |
273 | int r; | |
274 | ||
275 | lastaddr = 0; | |
276 | if (cp->a1 == NULL && cp->a2 == NULL) | |
277 | r = 1; | |
278 | else if (cp->a2) | |
279 | if (cp->inrange) { | |
280 | if (MATCH(cp->a2)) { | |
281 | cp->inrange = 0; | |
282 | lastaddr = 1; | |
283 | } | |
284 | r = 1; | |
285 | } else if (MATCH(cp->a1)) { | |
286 | /* | |
287 | * If the second address is a number less than or | |
288 | * equal to the line number first selected, only | |
289 | * one line shall be selected. | |
290 | * -- POSIX 1003.2 | |
291 | */ | |
292 | if (cp->a2->type == AT_LINE && | |
293 | linenum >= cp->a2->u.l) | |
294 | lastaddr = 1; | |
295 | else | |
296 | cp->inrange = 1; | |
297 | r = 1; | |
298 | } else | |
299 | r = 0; | |
300 | else | |
301 | r = MATCH(cp->a1); | |
302 | return (cp->nonsel ? ! r : r); | |
303 | } | |
304 | ||
305 | /* | |
306 | * substitute -- | |
307 | * Do substitutions in the pattern space. Currently, we build a | |
308 | * copy of the new pattern space in the substitute space structure | |
309 | * and then swap them. | |
310 | */ | |
311 | static int | |
312 | substitute(cp) | |
313 | struct s_command *cp; | |
314 | { | |
315 | SPACE tspace; | |
316 | regex_t *re; | |
317 | size_t re_off; | |
5e9460af | 318 | size_t re_eoff; |
c2199a45 AM |
319 | int n; |
320 | char *s; | |
9040bcef | 321 | char *eos; |
c2199a45 AM |
322 | |
323 | s = ps; | |
9040bcef | 324 | eos = s + strlen(s); |
c2199a45 AM |
325 | re = cp->u.s->re; |
326 | if (re == NULL) { | |
327 | if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { | |
328 | linenum = cp->u.s->linenum; | |
329 | err(COMPILE, "\\%d not defined in the RE", | |
330 | cp->u.s->maxbref); | |
331 | } | |
332 | } | |
333 | if (!regexec_e(re, s, 0, 0)) | |
334 | return (0); | |
335 | ||
336 | SS.len = 0; /* Clean substitute space. */ | |
337 | n = cp->u.s->n; | |
338 | switch (n) { | |
339 | case 0: /* Global */ | |
340 | do { | |
341 | /* Locate start of replaced string. */ | |
342 | re_off = match[0].rm_so; | |
5e9460af | 343 | re_eoff = match[0].rm_eo; |
c2199a45 AM |
344 | /* Copy leading retained string. */ |
345 | cspace(&SS, s, re_off, APPEND); | |
346 | /* Add in regular expression. */ | |
347 | regsub(&SS, s, cp->u.s->new); | |
348 | /* Move past this match. */ | |
349 | s += match[0].rm_eo; | |
5e9460af AM |
350 | } while(*s && re_eoff && regexec_e(re, s, REG_NOTBOL, 0)); |
351 | if (eos - s > 0 && !re_eoff) | |
9040bcef | 352 | err(FATAL, "infinite substitution loop"); |
c2199a45 AM |
353 | /* Copy trailing retained string. */ |
354 | cspace(&SS, s, strlen(s), APPEND); | |
355 | break; | |
356 | default: /* Nth occurrence */ | |
357 | while (--n) { | |
358 | s += match[0].rm_eo; | |
359 | if (!regexec_e(re, s, REG_NOTBOL, 0)) | |
360 | return (0); | |
361 | } | |
362 | /* FALLTHROUGH */ | |
363 | case 1: /* 1st occurrence */ | |
364 | /* Locate start of replaced string. */ | |
365 | re_off = match[0].rm_so + (s - ps); | |
366 | /* Copy leading retained string. */ | |
367 | cspace(&SS, ps, re_off, APPEND); | |
368 | /* Add in regular expression. */ | |
369 | regsub(&SS, s, cp->u.s->new); | |
370 | /* Copy trailing retained string. */ | |
371 | s += match[0].rm_eo; | |
372 | cspace(&SS, s, strlen(s), APPEND); | |
373 | break; | |
374 | } | |
375 | ||
376 | /* | |
377 | * Swap the substitute space and the pattern space, and make sure | |
378 | * that any leftover pointers into stdio memory get lost. | |
379 | */ | |
380 | tspace = PS; | |
381 | PS = SS; | |
382 | SS = tspace; | |
383 | SS.space = SS.back; | |
384 | ||
385 | /* Handle the 'p' flag. */ | |
386 | if (cp->u.s->p) | |
387 | (void)printf("%s\n", ps); | |
388 | ||
389 | /* Handle the 'w' flag. */ | |
390 | if (cp->u.s->wfile && !pd) { | |
391 | if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, | |
392 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) | |
393 | err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); | |
394 | iov[0].iov_base = ps; | |
395 | iov[0].iov_len = psl; | |
396 | if (writev(cp->u.s->wfd, iov, 2) != psl + 1) | |
397 | err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); | |
398 | } | |
399 | return (1); | |
400 | } | |
401 | ||
402 | /* | |
403 | * Flush append requests. Always called before reading a line, | |
404 | * therefore it also resets the substitution done (sdone) flag. | |
405 | */ | |
406 | static void | |
407 | flush_appends() | |
408 | { | |
409 | FILE *f; | |
410 | int count, i; | |
411 | char buf[8 * 1024]; | |
412 | ||
413 | for (i = 0; i < appendx; i++) | |
414 | switch (appends[i].type) { | |
415 | case AP_STRING: | |
416 | (void)printf("%s", appends[i].s); | |
417 | break; | |
418 | case AP_FILE: | |
419 | /* | |
420 | * Read files probably shouldn't be cached. Since | |
421 | * it's not an error to read a non-existent file, | |
422 | * it's possible that another program is interacting | |
423 | * with the sed script through the file system. It | |
424 | * would be truly bizarre, but possible. It's probably | |
425 | * not that big a performance win, anyhow. | |
426 | */ | |
427 | if ((f = fopen(appends[i].s, "r")) == NULL) | |
428 | break; | |
429 | while (count = fread(buf, 1, sizeof(buf), f)) | |
430 | (void)fwrite(buf, 1, count, stdout); | |
431 | (void)fclose(f); | |
432 | break; | |
433 | } | |
434 | if (ferror(stdout)) | |
435 | err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); | |
436 | appendx = sdone = 0; | |
437 | } | |
438 | ||
439 | static void | |
440 | lputs(s) | |
441 | register char *s; | |
442 | { | |
443 | register int count; | |
444 | register char *escapes, *p; | |
445 | struct winsize win; | |
446 | static int termwidth = -1; | |
447 | ||
448 | if (termwidth == -1) | |
449 | if (p = getenv("COLUMNS")) | |
450 | termwidth = atoi(p); | |
451 | else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && | |
452 | win.ws_col > 0) | |
453 | termwidth = win.ws_col; | |
454 | else | |
455 | termwidth = 60; | |
456 | ||
457 | for (count = 0; *s; ++s) { | |
458 | if (count >= termwidth) { | |
459 | (void)printf("\\\n"); | |
460 | count = 0; | |
461 | } | |
462 | if (isascii(*s) && isprint(*s) && *s != '\\') { | |
463 | (void)putchar(*s); | |
464 | count++; | |
465 | } else { | |
466 | escapes = "\\\a\b\f\n\r\t\v"; | |
467 | (void)putchar('\\'); | |
468 | if (p = strchr(escapes, *s)) { | |
469 | (void)putchar("\\abfnrtv"[p - escapes]); | |
470 | count += 2; | |
471 | } else { | |
472 | (void)printf("%03o", (u_char)*s); | |
473 | count += 4; | |
474 | } | |
475 | } | |
476 | } | |
477 | (void)putchar('$'); | |
478 | (void)putchar('\n'); | |
479 | if (ferror(stdout)) | |
480 | err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); | |
481 | } | |
482 | ||
483 | static inline int | |
484 | regexec_e(preg, string, eflags, nomatch) | |
485 | regex_t *preg; | |
486 | const char *string; | |
487 | int eflags, nomatch; | |
488 | { | |
489 | int eval; | |
490 | ||
491 | if (preg == NULL) { | |
492 | if (defpreg == NULL) | |
493 | err(FATAL, "first RE may not be empty"); | |
494 | } else | |
495 | defpreg = preg; | |
496 | ||
497 | eval = regexec(defpreg, string, | |
498 | nomatch ? 0 : maxnsub + 1, match, eflags); | |
499 | switch(eval) { | |
500 | case 0: | |
501 | return (1); | |
502 | case REG_NOMATCH: | |
503 | return (0); | |
504 | } | |
505 | err(FATAL, "RE error: %s", strregerror(eval, defpreg)); | |
506 | /* NOTREACHED */ | |
507 | } | |
508 | ||
509 | /* | |
510 | * regsub - perform substitutions after a regexp match | |
511 | * Based on a routine by Henry Spencer | |
512 | */ | |
513 | static void | |
514 | regsub(sp, string, src) | |
515 | SPACE *sp; | |
516 | char *string, *src; | |
517 | { | |
518 | register int len, no; | |
519 | register char c, *dst; | |
520 | ||
521 | #define NEEDSP(reqlen) \ | |
522 | if (sp->len >= sp->blen - (reqlen) - 1) { \ | |
523 | sp->blen += (reqlen) + 1024; \ | |
524 | sp->space = sp->back = xrealloc(sp->back, sp->blen); \ | |
525 | dst = sp->space + sp->len; \ | |
526 | } | |
527 | ||
528 | dst = sp->space + sp->len; | |
529 | while ((c = *src++) != '\0') { | |
530 | if (c == '&') | |
531 | no = 0; | |
532 | else if (c == '\\' && isdigit(*src)) | |
533 | no = *src++ - '0'; | |
534 | else | |
535 | no = -1; | |
536 | if (no < 0) { /* Ordinary character. */ | |
537 | if (c == '\\' && (*src == '\\' || *src == '&')) | |
538 | c = *src++; | |
539 | NEEDSP(1); | |
540 | *dst++ = c; | |
541 | ++sp->len; | |
542 | } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { | |
543 | len = match[no].rm_eo - match[no].rm_so; | |
544 | NEEDSP(len); | |
545 | memmove(dst, string + match[no].rm_so, len); | |
546 | dst += len; | |
547 | sp->len += len; | |
548 | } | |
549 | } | |
550 | NEEDSP(1); | |
551 | *dst = '\0'; | |
552 | } | |
553 | ||
554 | /* | |
555 | * aspace -- | |
556 | * Append the source space to the destination space, allocating new | |
557 | * space as necessary. | |
558 | */ | |
559 | void | |
560 | cspace(sp, p, len, spflag) | |
561 | SPACE *sp; | |
562 | char *p; | |
563 | size_t len; | |
564 | enum e_spflag spflag; | |
565 | { | |
566 | size_t tlen; | |
567 | ||
568 | /* | |
569 | * Make sure SPACE has enough memory and ramp up quickly. Appends | |
570 | * need two extra bytes, one for the newline, one for a terminating | |
571 | * NULL. | |
572 | */ | |
fd1ae05e AM |
573 | /* tlen = sp->len + len + spflag == APPENDNL ? 2 : 1; */ |
574 | tlen = sp->len + len + (spflag == APPENDNL ? 2 : 1); /* XXX */ | |
c2199a45 AM |
575 | if (tlen > sp->blen) { |
576 | sp->blen = tlen + 1024; | |
577 | sp->space = sp->back = xrealloc(sp->back, sp->blen); | |
578 | } | |
579 | ||
580 | if (spflag == APPENDNL) | |
581 | sp->space[sp->len++] = '\n'; | |
582 | else if (spflag == REPLACE) | |
583 | sp->len = 0; | |
584 | ||
585 | memmove(sp->space + sp->len, p, len); | |
586 | sp->space[sp->len += len] = '\0'; | |
587 | } | |
588 | ||
589 | /* | |
590 | * Close all cached opened files and report any errors | |
591 | */ | |
592 | void | |
593 | cfclose(cp, end) | |
594 | register struct s_command *cp, *end; | |
595 | { | |
596 | ||
597 | for (; cp != end; cp = cp->next) | |
598 | switch(cp->code) { | |
599 | case 's': | |
600 | if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) | |
601 | err(FATAL, | |
602 | "%s: %s", cp->u.s->wfile, strerror(errno)); | |
603 | cp->u.s->wfd = -1; | |
604 | break; | |
605 | case 'w': | |
606 | if (cp->u.fd != -1 && close(cp->u.fd)) | |
607 | err(FATAL, "%s: %s", cp->t, strerror(errno)); | |
608 | cp->u.fd = -1; | |
609 | break; | |
610 | case '{': | |
611 | cfclose(cp->u.c, cp->next); | |
612 | break; | |
613 | } | |
614 | } |