Commit | Line | Data |
---|---|---|
2b932e6d KB |
1 | /*- |
2 | * Copyright (c) 1992 Diomidis Spinellis. | |
3 | * Copyright (c) 1992 The Regents of the University of California. | |
4 | * All rights reserved. | |
5 | * | |
6 | * This code is derived from software contributed to Berkeley by | |
7 | * Diomidis Spinellis of Imperial College, University of London. | |
8 | * | |
9 | * %sccs.include.redist.c% | |
10 | */ | |
11 | ||
12 | #ifndef lint | |
bbb5d6cb | 13 | static char sccsid[] = "@(#)process.c 5.2 (Berkeley) %G%"; |
2b932e6d KB |
14 | #endif /* not lint */ |
15 | ||
16 | #include <sys/types.h> | |
17 | #include <sys/stat.h> | |
18 | #include <sys/ioctl.h> | |
19 | #include <sys/uio.h> | |
20 | ||
21 | #include <ctype.h> | |
22 | #include <errno.h> | |
23 | #include <fcntl.h> | |
24 | #include <limits.h> | |
25 | #include <regex.h> | |
26 | #include <stdio.h> | |
27 | #include <stdlib.h> | |
28 | #include <string.h> | |
29 | #include <unistd.h> | |
30 | ||
31 | #include "defs.h" | |
32 | #include "extern.h" | |
33 | ||
34 | typedef struct { | |
35 | char *space; /* Current space pointer. */ | |
36 | size_t len; /* Current length. */ | |
37 | int deleted; /* If deleted. */ | |
38 | char *back; /* Backing memory. */ | |
39 | size_t blen; /* Backing memory length. */ | |
40 | } SPACE; | |
41 | static SPACE HS, PS, SS; | |
42 | #define pd PS.deleted | |
43 | #define ps PS.space | |
44 | #define psl PS.len | |
45 | #define hs HS.space | |
46 | #define hsl HS.len | |
47 | ||
48 | static inline int applies __P((struct s_command *)); | |
49 | static void cspace __P((SPACE *, char *, size_t, int)); | |
50 | static void flush_appends __P((void)); | |
51 | static void lputs __P((char *)); | |
52 | static inline int match __P((struct s_addr *)); | |
53 | static int regexec_check __P((regex_t *, const char *, | |
54 | int, regmatch_t[], int)); | |
55 | static void regsub __P((regmatch_t *, char *, char *, SPACE *)); | |
56 | static int substitute __P((struct s_command *)); | |
57 | ||
58 | struct s_appends *appends; /* Array of pointers to strings to append. */ | |
59 | static int appendx; /* Index into appends array. */ | |
60 | int appendnum; /* Size of appends array. */ | |
61 | ||
62 | static int lastaddr; /* Set by applies if last address of a range. */ | |
63 | static int sdone; /* If any substitutes since last line input. */ | |
64 | /* Iov structure for 'w' commands. */ | |
65 | static struct iovec iov[2] = { NULL, 0, "\n", 1 }; | |
66 | ||
67 | void | |
68 | process() | |
69 | { | |
70 | struct s_command *cp; | |
71 | SPACE tspace; | |
72 | size_t len; | |
73 | char oldc, *p; | |
74 | ||
75 | for (linenum = 0; ps = mf_fgets(&psl);) { | |
76 | pd = 0; | |
77 | cp = prog; | |
78 | redirect: | |
79 | while (cp != NULL) { | |
80 | if (!applies(cp)) { | |
81 | cp = cp->next; | |
82 | continue; | |
83 | } | |
84 | switch (cp->code) { | |
85 | case '{': | |
86 | cp = cp->u.c; | |
87 | goto redirect; | |
88 | case 'a': | |
89 | if (appendx >= appendnum) | |
90 | appends = xrealloc(appends, | |
91 | sizeof(struct s_appends) * | |
92 | (appendnum *= 2)); | |
93 | appends[appendx].type = AP_STRING; | |
94 | appends[appendx].s = cp->t; | |
95 | appendx++; | |
96 | break; | |
97 | case 'b': | |
98 | cp = cp->u.c; | |
99 | goto redirect; | |
100 | case 'c': | |
101 | pd = 1; | |
102 | psl = 0; | |
103 | if (cp->a2 == NULL || lastaddr) | |
104 | (void)printf("%s", cp->t); | |
105 | break; | |
106 | case 'd': | |
107 | if (pd) | |
108 | goto new; | |
bbb5d6cb | 109 | pd = 1; |
2b932e6d KB |
110 | goto new; |
111 | case 'D': | |
112 | if (pd) | |
113 | goto new; | |
bbb5d6cb KB |
114 | if ((p = strchr(ps, '\n')) == NULL) |
115 | pd = 1; | |
116 | else { | |
2b932e6d KB |
117 | psl -= (p - ps) - 1; |
118 | memmove(ps, p + 1, psl); | |
119 | } | |
120 | goto new; | |
121 | case 'g': | |
122 | ps = hs; | |
123 | psl = hsl; | |
124 | break; | |
125 | case 'G': | |
126 | cspace(&PS, hs, hsl, 1); | |
127 | break; | |
128 | case 'h': | |
129 | cspace(&HS, ps, psl, 0); | |
130 | break; | |
131 | case 'H': | |
132 | cspace(&HS, ps, psl, 1); | |
133 | break; | |
134 | case 'i': | |
135 | (void)printf("%s", cp->t); | |
136 | break; | |
137 | case 'l': | |
138 | lputs(ps); | |
139 | break; | |
140 | case 'n': | |
141 | if (!nflag && !pd) | |
142 | (void)printf("%s\n", ps); | |
143 | flush_appends(); | |
144 | ps = mf_fgets(&psl); | |
145 | #ifdef HISTORIC_PRACTICE | |
146 | if (ps == NULL) | |
147 | exit(0); | |
148 | #endif | |
149 | pd = 0; | |
150 | break; | |
151 | case 'N': | |
152 | flush_appends(); | |
153 | if (ps != PS.back) | |
154 | cspace(&PS, NULL, 0, 0); | |
155 | if ((p = mf_fgets(&len)) == NULL) { | |
156 | if (!nflag && !pd) | |
157 | (void)printf("%s\n", ps); | |
158 | exit(0); | |
159 | } | |
160 | cspace(&PS, p, len, 1); | |
161 | break; | |
162 | case 'p': | |
163 | if (pd) | |
164 | break; | |
165 | (void)printf("%s\n", ps); | |
166 | break; | |
167 | case 'P': | |
168 | if (pd) | |
169 | break; | |
170 | if ((p = strchr(ps, '\n')) != NULL) { | |
171 | oldc = *p; | |
172 | *p = '\0'; | |
173 | } | |
174 | (void)printf("%s\n", ps); | |
175 | if (p != NULL) | |
176 | *p = oldc; | |
177 | break; | |
178 | case 'q': | |
179 | if (!nflag && !pd) | |
180 | (void)printf("%s\n", ps); | |
181 | flush_appends(); | |
182 | exit(0); | |
183 | case 'r': | |
184 | if (appendx >= appendnum) | |
185 | appends = xrealloc(appends, | |
186 | sizeof(struct s_appends) * | |
187 | (appendnum *= 2)); | |
188 | appends[appendx].type = AP_FILE; | |
189 | appends[appendx].s = cp->t; | |
190 | appendx++; | |
191 | break; | |
192 | case 's': | |
193 | sdone = substitute(cp); | |
194 | break; | |
195 | case 't': | |
196 | if (sdone) { | |
197 | sdone = 0; | |
198 | cp = cp->u.c; | |
199 | goto redirect; | |
200 | } | |
201 | break; | |
202 | case 'w': | |
203 | if (pd) | |
204 | break; | |
205 | if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, | |
206 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, | |
207 | DEFFILEMODE)) == -1) | |
208 | err(FATAL, "%s: %s\n", | |
209 | cp->t, strerror(errno)); | |
210 | iov[0].iov_base = ps; | |
211 | iov[0].iov_len = psl; | |
212 | if (writev(cp->u.fd, iov, 2) != psl + 1) | |
213 | err(FATAL, "%s: %s\n", | |
214 | cp->t, strerror(errno)); | |
215 | break; | |
216 | case 'x': | |
217 | tspace = PS; | |
218 | PS = HS; | |
219 | HS = tspace; | |
220 | break; | |
221 | case 'y': | |
222 | if (pd) | |
223 | break; | |
224 | for (p = ps, len = psl; len--; ++p) | |
225 | *p = cp->u.y[*p]; | |
226 | break; | |
227 | case ':': | |
228 | case '}': | |
229 | break; | |
230 | case '=': | |
231 | (void)printf("%lu\n", linenum); | |
232 | } | |
233 | cp = cp->next; | |
234 | } /* for all cp */ | |
235 | ||
236 | new: if (!nflag && !pd) | |
237 | (void)printf("%s\n", ps); | |
238 | flush_appends(); | |
239 | } /* for all lines */ | |
240 | } | |
241 | ||
242 | /* | |
243 | * Return TRUE if the command applies to the current line. Sets the inrange | |
244 | * flag to process ranges. Interprets the non-select (``!'') flag. | |
245 | */ | |
246 | static inline int | |
247 | applies(cp) | |
248 | struct s_command *cp; | |
249 | { | |
250 | int r; | |
251 | ||
252 | lastaddr = 0; | |
253 | if (cp->a1 == NULL && cp->a2 == NULL) | |
254 | r = 1; | |
255 | else if (cp->a2) | |
256 | if (cp->inrange) { | |
257 | if (match(cp->a2)) { | |
258 | cp->inrange = 0; | |
259 | lastaddr = 1; | |
260 | } | |
261 | r = 1; | |
262 | } else if (match(cp->a1)) { | |
263 | /* | |
264 | * If the second address is a number less than or | |
265 | * equal to the line number first selected, only | |
266 | * one line shall be selected. | |
267 | * -- POSIX 1003.2 | |
268 | */ | |
269 | if (cp->a2->type == AT_LINE && | |
270 | linenum >= cp->a2->u.l) | |
271 | lastaddr = 1; | |
272 | else | |
273 | cp->inrange = 1; | |
274 | r = 1; | |
275 | } else | |
276 | r = 0; | |
277 | else | |
278 | r = match(cp->a1); | |
279 | return (cp->nonsel ? ! r : r); | |
280 | } | |
281 | ||
282 | /* | |
283 | * Return TRUE if the address passed matches the current program | |
284 | * state (linenumber, ps, lastline) | |
285 | */ | |
286 | static int inline | |
287 | match(a) | |
288 | struct s_addr *a; | |
289 | { | |
290 | int eval; | |
291 | ||
292 | switch (a->type) { | |
293 | case AT_RE: | |
294 | switch (eval = regexec(a->u.r, ps, 0, NULL, 0)) { | |
295 | case 0: | |
296 | return (1); | |
297 | case REG_NOMATCH: | |
298 | return (0); | |
299 | default: | |
300 | err(FATAL, "RE error: %s", strregerror(eval, a->u.r)); | |
301 | } | |
302 | case AT_LINE: | |
303 | return (linenum == a->u.l); | |
304 | case AT_LAST: | |
305 | return (lastline); | |
306 | } | |
307 | /* NOTREACHED */ | |
308 | } | |
309 | ||
310 | /* | |
311 | * substitute -- | |
312 | * Do substitutions in the pattern space. Currently, we build a | |
313 | * copy of the new pattern space in the substitute space structure | |
314 | * and then swap them. | |
315 | */ | |
316 | static int | |
317 | substitute(cp) | |
318 | struct s_command *cp; | |
319 | { | |
320 | SPACE tspace; | |
321 | static regex_t *re; | |
322 | int n, re_off; | |
323 | char *endp, *s; | |
324 | ||
325 | s = ps; | |
326 | re = &cp->u.s->re; | |
327 | if (regexec_check(re, | |
328 | s, re->re_nsub + 1, cp->u.s->pmatch, 0) == REG_NOMATCH) | |
329 | return (0); | |
330 | ||
331 | SS.len = 0; /* Clean substitute space. */ | |
332 | n = cp->u.s->n; | |
333 | switch (n) { | |
334 | case 0: /* Global */ | |
335 | do { | |
336 | /* Locate start of replaced string. */ | |
337 | re_off = cp->u.s->pmatch[0].rm_so; | |
338 | /* Locate end of replaced string + 1. */ | |
339 | endp = s + cp->u.s->pmatch[0].rm_eo; | |
340 | /* Copy leading retained string. */ | |
341 | cspace(&SS, s, re_off, 0); | |
342 | /* Add in regular expression. */ | |
343 | regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS); | |
344 | /* Move past this match. */ | |
345 | s += cp->u.s->pmatch[0].rm_eo; | |
346 | } while(regexec_check(re, s, re->re_nsub + 1, | |
347 | cp->u.s->pmatch, REG_NOTBOL) != REG_NOMATCH); | |
348 | /* Copy trailing retained string. */ | |
349 | cspace(&SS, s, strlen(s), 0); | |
350 | break; | |
351 | default: /* Nth occurrence */ | |
352 | while (--n) { | |
353 | s += cp->u.s->pmatch[0].rm_eo; | |
354 | if (regexec_check(re, s, re->re_nsub + 1, | |
355 | cp->u.s->pmatch, REG_NOTBOL) == REG_NOMATCH) | |
356 | return (0); | |
357 | } | |
358 | /* FALLTHROUGH */ | |
359 | case 1: /* 1st occurrence */ | |
360 | /* Locate start of replaced string. */ | |
361 | re_off = cp->u.s->pmatch[0].rm_so + s - ps; | |
362 | /* Copy leading retained string. */ | |
363 | cspace(&SS, ps, re_off, 0); | |
364 | /* Add in regular expression. */ | |
365 | regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS); | |
366 | /* Copy trailing retained string. */ | |
367 | s += cp->u.s->pmatch[0].rm_eo; | |
368 | cspace(&SS, s, strlen(s), 0); | |
369 | break; | |
370 | } | |
371 | ||
372 | /* | |
373 | * Swap the substitute space and the pattern space, and make sure | |
374 | * that any leftover pointers into stdio memory get lost. | |
375 | */ | |
376 | tspace = PS; | |
377 | PS = SS; | |
378 | SS = tspace; | |
379 | SS.space = SS.back; | |
380 | ||
381 | /* Handle the 'p' flag. */ | |
382 | if (cp->u.s->p) | |
383 | (void)printf("%s\n", ps); | |
384 | ||
385 | /* Handle the 'w' flag. */ | |
386 | if (cp->u.s->wfile && !pd) { | |
387 | if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, | |
388 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) | |
389 | err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); | |
390 | iov[0].iov_base = ps; | |
391 | iov[0].iov_len = psl; | |
392 | if (writev(cp->u.s->wfd, iov, 2) != psl + 1) | |
393 | err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); | |
394 | } | |
395 | return (1); | |
396 | } | |
397 | ||
398 | /* | |
399 | * Flush append requests. Always called before reading a line, | |
400 | * therefore it also resets the substitution done (sdone) flag. | |
401 | */ | |
402 | static void | |
403 | flush_appends() | |
404 | { | |
405 | FILE *f; | |
406 | int count, i; | |
407 | char buf[8 * 1024]; | |
408 | ||
409 | for (i = 0; i < appendx; i++) | |
410 | switch (appends[i].type) { | |
411 | case AP_STRING: | |
412 | (void)printf("%s", appends[i].s); | |
413 | break; | |
414 | case AP_FILE: | |
415 | /* | |
416 | * Read files probably shouldn't be cached. Since | |
417 | * it's not an error to read a non-existent file, | |
418 | * it's possible that another program is interacting | |
419 | * with the sed script through the file system. It | |
420 | * would be truly bizarre, but possible. It's probably | |
421 | * not that big a performance win, anyhow. | |
422 | */ | |
423 | if ((f = fopen(appends[i].s, "r")) == NULL) | |
424 | break; | |
425 | while (count = fread(buf, 1, sizeof(buf), f)) | |
426 | (void)fwrite(buf, 1, count, stdout); | |
427 | (void)fclose(f); | |
428 | break; | |
429 | } | |
430 | if (ferror(stdout)) | |
431 | err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); | |
432 | appendx = 0; | |
433 | sdone = 0; | |
434 | } | |
435 | ||
436 | static void | |
437 | lputs(s) | |
438 | register char *s; | |
439 | { | |
440 | register int count; | |
441 | register char *escapes, *p; | |
442 | struct winsize win; | |
443 | static int termwidth = -1; | |
444 | ||
445 | if (termwidth == -1) | |
446 | if (p = getenv("COLUMNS")) | |
447 | termwidth = atoi(p); | |
448 | else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && | |
449 | win.ws_col > 0) | |
450 | termwidth = win.ws_col; | |
451 | else | |
452 | termwidth = 60; | |
453 | ||
454 | for (count = 0; *s; ++s) { | |
455 | if (count >= termwidth) { | |
456 | (void)printf("\\\n"); | |
457 | count = 0; | |
458 | } | |
459 | if (isascii(*s) && isprint(*s) && *s != '\\') { | |
460 | (void)putchar(*s); | |
461 | count++; | |
462 | } else { | |
463 | escapes = "\\\a\b\f\n\r\t\v"; | |
464 | (void)putchar('\\'); | |
465 | if (p = strchr(escapes, *s)) { | |
466 | (void)putchar("\\abfnrtv"[p - escapes]); | |
467 | count += 2; | |
468 | } else { | |
469 | (void)printf("%03o", (u_char)*s); | |
470 | count += 4; | |
471 | } | |
472 | } | |
473 | } | |
474 | (void)putchar('$'); | |
475 | (void)putchar('\n'); | |
476 | if (ferror(stdout)) | |
477 | err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); | |
478 | } | |
479 | ||
480 | /* | |
481 | * Regexec with checking for errors | |
482 | */ | |
483 | static int | |
484 | regexec_check(preg, string, nmatch, pmatch, eflags) | |
485 | regex_t *preg; | |
486 | const char *string; | |
487 | int nmatch; | |
488 | regmatch_t pmatch[]; | |
489 | int eflags; | |
490 | { | |
491 | int eval; | |
492 | ||
493 | switch (eval = regexec(preg, string, nmatch, pmatch, eflags)) { | |
494 | case 0: | |
495 | return (0); | |
496 | case REG_NOMATCH: | |
497 | return (REG_NOMATCH); | |
498 | default: | |
499 | err(FATAL, "RE error: %s", strregerror(eval, preg)); | |
500 | } | |
501 | /* NOTREACHED */ | |
502 | } | |
503 | ||
504 | /* | |
505 | * regsub - perform substitutions after a regexp match | |
506 | * Based on a routine by Henry Spencer | |
507 | */ | |
508 | static void | |
509 | regsub(pmatch, string, src, sp) | |
510 | regmatch_t *pmatch; | |
511 | char *string, *src; | |
512 | SPACE *sp; | |
513 | { | |
514 | register int len, no; | |
515 | register char c, *dst; | |
516 | ||
517 | #define NEEDSP(reqlen) \ | |
518 | if (sp->len >= sp->blen - (reqlen) - 1) { \ | |
519 | sp->blen += (reqlen) + 1024; \ | |
520 | sp->space = sp->back = xrealloc(sp->back, sp->blen); \ | |
521 | dst = sp->space + sp->len; \ | |
522 | } | |
523 | ||
524 | dst = sp->space + sp->len; | |
525 | while ((c = *src++) != '\0') { | |
526 | if (c == '&') | |
527 | no = 0; | |
528 | else if (c == '\\' && isdigit(*src)) | |
529 | no = *src++ - '0'; | |
530 | else | |
531 | no = -1; | |
532 | if (no < 0) { /* Ordinary character. */ | |
533 | if (c == '\\' && (*src == '\\' || *src == '&')) | |
534 | c = *src++; | |
535 | NEEDSP(1); | |
536 | *dst++ = c; | |
537 | ++sp->len; | |
538 | } else if (pmatch[no].rm_so != -1 && pmatch[no].rm_eo != -1) { | |
539 | len = pmatch[no].rm_eo - pmatch[no].rm_so; | |
540 | NEEDSP(len); | |
541 | memmove(dst, string + pmatch[no].rm_so, len); | |
542 | dst += len; | |
543 | sp->len += len; | |
544 | } | |
545 | } | |
546 | NEEDSP(1); | |
547 | *dst = '\0'; | |
548 | } | |
549 | ||
550 | /* | |
551 | * aspace -- | |
552 | * Append the source space to the destination space, allocating new | |
553 | * space as necessary. | |
554 | */ | |
555 | static void | |
556 | cspace(sp, p, len, append) | |
557 | SPACE *sp; | |
558 | char *p; | |
559 | size_t len; | |
560 | int append; | |
561 | { | |
562 | size_t tlen; | |
563 | int needcopy; | |
564 | ||
565 | /* Current pointer may point to something else at the moment. */ | |
566 | needcopy = sp->space != sp->back; | |
567 | ||
568 | /* | |
569 | * Make sure SPACE has enough memory and ramp up quickly. | |
570 | * Add in two extra bytes, one for the newline, one for a | |
571 | * terminating NULL. | |
572 | */ | |
573 | tlen = sp->len + len + 2; | |
574 | if (tlen > sp->blen) { | |
575 | sp->blen = tlen + 1024; | |
576 | sp->back = xrealloc(sp->back, sp->blen); | |
577 | } | |
578 | ||
579 | if (needcopy) | |
580 | memmove(sp->back, sp->space, sp->len + 1); | |
581 | sp->space = sp->back; | |
582 | ||
583 | /* May just be copying out of a stdio buffer. */ | |
584 | if (len == NULL) | |
585 | return; | |
586 | ||
587 | /* Append a separating newline. */ | |
588 | if (append) | |
589 | sp->space[sp->len++] = '\n'; | |
590 | ||
591 | /* Append the new stuff, plus its terminating NULL. */ | |
592 | memmove(sp->space + sp->len, p, len + 1); | |
593 | sp->len += len; | |
594 | } | |
595 | ||
596 | /* | |
597 | * Close all cached opened files and report any errors | |
598 | */ | |
599 | void | |
600 | cfclose(cp) | |
601 | register struct s_command *cp; | |
602 | { | |
603 | ||
604 | for (; cp != NULL; cp = cp->next) | |
605 | switch(cp->code) { | |
606 | case 's': | |
607 | if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) | |
608 | err(FATAL, | |
609 | "%s: %s", cp->u.s->wfile, strerror(errno)); | |
610 | break; | |
611 | case 'w': | |
612 | if (cp->u.fd != -1 && close(cp->u.fd)) | |
613 | err(FATAL, "%s: %s", cp->t, strerror(errno)); | |
614 | break; | |
615 | case '{': | |
616 | cfclose(cp->u.c); | |
617 | break; | |
618 | } | |
619 | } |