| 1 | /*- |
| 2 | * Copyright (c) 1992 Diomidis Spinellis. |
| 3 | * Copyright (c) 1992 The Regents of the University of California. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * This code is derived from software contributed to Berkeley by |
| 7 | * Diomidis Spinellis of Imperial College, University of London. |
| 8 | * |
| 9 | * %sccs.include.redist.c% |
| 10 | */ |
| 11 | |
| 12 | #ifndef lint |
| 13 | static char sccsid[] = "@(#)process.c 5.2 (Berkeley) %G%"; |
| 14 | #endif /* not lint */ |
| 15 | |
| 16 | #include <sys/types.h> |
| 17 | #include <sys/stat.h> |
| 18 | #include <sys/ioctl.h> |
| 19 | #include <sys/uio.h> |
| 20 | |
| 21 | #include <ctype.h> |
| 22 | #include <errno.h> |
| 23 | #include <fcntl.h> |
| 24 | #include <limits.h> |
| 25 | #include <regex.h> |
| 26 | #include <stdio.h> |
| 27 | #include <stdlib.h> |
| 28 | #include <string.h> |
| 29 | #include <unistd.h> |
| 30 | |
| 31 | #include "defs.h" |
| 32 | #include "extern.h" |
| 33 | |
| 34 | typedef struct { |
| 35 | char *space; /* Current space pointer. */ |
| 36 | size_t len; /* Current length. */ |
| 37 | int deleted; /* If deleted. */ |
| 38 | char *back; /* Backing memory. */ |
| 39 | size_t blen; /* Backing memory length. */ |
| 40 | } SPACE; |
| 41 | static SPACE HS, PS, SS; |
| 42 | #define pd PS.deleted |
| 43 | #define ps PS.space |
| 44 | #define psl PS.len |
| 45 | #define hs HS.space |
| 46 | #define hsl HS.len |
| 47 | |
| 48 | static inline int applies __P((struct s_command *)); |
| 49 | static void cspace __P((SPACE *, char *, size_t, int)); |
| 50 | static void flush_appends __P((void)); |
| 51 | static void lputs __P((char *)); |
| 52 | static inline int match __P((struct s_addr *)); |
| 53 | static int regexec_check __P((regex_t *, const char *, |
| 54 | int, regmatch_t[], int)); |
| 55 | static void regsub __P((regmatch_t *, char *, char *, SPACE *)); |
| 56 | static int substitute __P((struct s_command *)); |
| 57 | |
| 58 | struct s_appends *appends; /* Array of pointers to strings to append. */ |
| 59 | static int appendx; /* Index into appends array. */ |
| 60 | int appendnum; /* Size of appends array. */ |
| 61 | |
| 62 | static int lastaddr; /* Set by applies if last address of a range. */ |
| 63 | static int sdone; /* If any substitutes since last line input. */ |
| 64 | /* Iov structure for 'w' commands. */ |
| 65 | static struct iovec iov[2] = { NULL, 0, "\n", 1 }; |
| 66 | |
| 67 | void |
| 68 | process() |
| 69 | { |
| 70 | struct s_command *cp; |
| 71 | SPACE tspace; |
| 72 | size_t len; |
| 73 | char oldc, *p; |
| 74 | |
| 75 | for (linenum = 0; ps = mf_fgets(&psl);) { |
| 76 | pd = 0; |
| 77 | cp = prog; |
| 78 | redirect: |
| 79 | while (cp != NULL) { |
| 80 | if (!applies(cp)) { |
| 81 | cp = cp->next; |
| 82 | continue; |
| 83 | } |
| 84 | switch (cp->code) { |
| 85 | case '{': |
| 86 | cp = cp->u.c; |
| 87 | goto redirect; |
| 88 | case 'a': |
| 89 | if (appendx >= appendnum) |
| 90 | appends = xrealloc(appends, |
| 91 | sizeof(struct s_appends) * |
| 92 | (appendnum *= 2)); |
| 93 | appends[appendx].type = AP_STRING; |
| 94 | appends[appendx].s = cp->t; |
| 95 | appendx++; |
| 96 | break; |
| 97 | case 'b': |
| 98 | cp = cp->u.c; |
| 99 | goto redirect; |
| 100 | case 'c': |
| 101 | pd = 1; |
| 102 | psl = 0; |
| 103 | if (cp->a2 == NULL || lastaddr) |
| 104 | (void)printf("%s", cp->t); |
| 105 | break; |
| 106 | case 'd': |
| 107 | if (pd) |
| 108 | goto new; |
| 109 | pd = 1; |
| 110 | goto new; |
| 111 | case 'D': |
| 112 | if (pd) |
| 113 | goto new; |
| 114 | if ((p = strchr(ps, '\n')) == NULL) |
| 115 | pd = 1; |
| 116 | else { |
| 117 | psl -= (p - ps) - 1; |
| 118 | memmove(ps, p + 1, psl); |
| 119 | } |
| 120 | goto new; |
| 121 | case 'g': |
| 122 | ps = hs; |
| 123 | psl = hsl; |
| 124 | break; |
| 125 | case 'G': |
| 126 | cspace(&PS, hs, hsl, 1); |
| 127 | break; |
| 128 | case 'h': |
| 129 | cspace(&HS, ps, psl, 0); |
| 130 | break; |
| 131 | case 'H': |
| 132 | cspace(&HS, ps, psl, 1); |
| 133 | break; |
| 134 | case 'i': |
| 135 | (void)printf("%s", cp->t); |
| 136 | break; |
| 137 | case 'l': |
| 138 | lputs(ps); |
| 139 | break; |
| 140 | case 'n': |
| 141 | if (!nflag && !pd) |
| 142 | (void)printf("%s\n", ps); |
| 143 | flush_appends(); |
| 144 | ps = mf_fgets(&psl); |
| 145 | #ifdef HISTORIC_PRACTICE |
| 146 | if (ps == NULL) |
| 147 | exit(0); |
| 148 | #endif |
| 149 | pd = 0; |
| 150 | break; |
| 151 | case 'N': |
| 152 | flush_appends(); |
| 153 | if (ps != PS.back) |
| 154 | cspace(&PS, NULL, 0, 0); |
| 155 | if ((p = mf_fgets(&len)) == NULL) { |
| 156 | if (!nflag && !pd) |
| 157 | (void)printf("%s\n", ps); |
| 158 | exit(0); |
| 159 | } |
| 160 | cspace(&PS, p, len, 1); |
| 161 | break; |
| 162 | case 'p': |
| 163 | if (pd) |
| 164 | break; |
| 165 | (void)printf("%s\n", ps); |
| 166 | break; |
| 167 | case 'P': |
| 168 | if (pd) |
| 169 | break; |
| 170 | if ((p = strchr(ps, '\n')) != NULL) { |
| 171 | oldc = *p; |
| 172 | *p = '\0'; |
| 173 | } |
| 174 | (void)printf("%s\n", ps); |
| 175 | if (p != NULL) |
| 176 | *p = oldc; |
| 177 | break; |
| 178 | case 'q': |
| 179 | if (!nflag && !pd) |
| 180 | (void)printf("%s\n", ps); |
| 181 | flush_appends(); |
| 182 | exit(0); |
| 183 | case 'r': |
| 184 | if (appendx >= appendnum) |
| 185 | appends = xrealloc(appends, |
| 186 | sizeof(struct s_appends) * |
| 187 | (appendnum *= 2)); |
| 188 | appends[appendx].type = AP_FILE; |
| 189 | appends[appendx].s = cp->t; |
| 190 | appendx++; |
| 191 | break; |
| 192 | case 's': |
| 193 | sdone = substitute(cp); |
| 194 | break; |
| 195 | case 't': |
| 196 | if (sdone) { |
| 197 | sdone = 0; |
| 198 | cp = cp->u.c; |
| 199 | goto redirect; |
| 200 | } |
| 201 | break; |
| 202 | case 'w': |
| 203 | if (pd) |
| 204 | break; |
| 205 | if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, |
| 206 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, |
| 207 | DEFFILEMODE)) == -1) |
| 208 | err(FATAL, "%s: %s\n", |
| 209 | cp->t, strerror(errno)); |
| 210 | iov[0].iov_base = ps; |
| 211 | iov[0].iov_len = psl; |
| 212 | if (writev(cp->u.fd, iov, 2) != psl + 1) |
| 213 | err(FATAL, "%s: %s\n", |
| 214 | cp->t, strerror(errno)); |
| 215 | break; |
| 216 | case 'x': |
| 217 | tspace = PS; |
| 218 | PS = HS; |
| 219 | HS = tspace; |
| 220 | break; |
| 221 | case 'y': |
| 222 | if (pd) |
| 223 | break; |
| 224 | for (p = ps, len = psl; len--; ++p) |
| 225 | *p = cp->u.y[*p]; |
| 226 | break; |
| 227 | case ':': |
| 228 | case '}': |
| 229 | break; |
| 230 | case '=': |
| 231 | (void)printf("%lu\n", linenum); |
| 232 | } |
| 233 | cp = cp->next; |
| 234 | } /* for all cp */ |
| 235 | |
| 236 | new: if (!nflag && !pd) |
| 237 | (void)printf("%s\n", ps); |
| 238 | flush_appends(); |
| 239 | } /* for all lines */ |
| 240 | } |
| 241 | |
| 242 | /* |
| 243 | * Return TRUE if the command applies to the current line. Sets the inrange |
| 244 | * flag to process ranges. Interprets the non-select (``!'') flag. |
| 245 | */ |
| 246 | static inline int |
| 247 | applies(cp) |
| 248 | struct s_command *cp; |
| 249 | { |
| 250 | int r; |
| 251 | |
| 252 | lastaddr = 0; |
| 253 | if (cp->a1 == NULL && cp->a2 == NULL) |
| 254 | r = 1; |
| 255 | else if (cp->a2) |
| 256 | if (cp->inrange) { |
| 257 | if (match(cp->a2)) { |
| 258 | cp->inrange = 0; |
| 259 | lastaddr = 1; |
| 260 | } |
| 261 | r = 1; |
| 262 | } else if (match(cp->a1)) { |
| 263 | /* |
| 264 | * If the second address is a number less than or |
| 265 | * equal to the line number first selected, only |
| 266 | * one line shall be selected. |
| 267 | * -- POSIX 1003.2 |
| 268 | */ |
| 269 | if (cp->a2->type == AT_LINE && |
| 270 | linenum >= cp->a2->u.l) |
| 271 | lastaddr = 1; |
| 272 | else |
| 273 | cp->inrange = 1; |
| 274 | r = 1; |
| 275 | } else |
| 276 | r = 0; |
| 277 | else |
| 278 | r = match(cp->a1); |
| 279 | return (cp->nonsel ? ! r : r); |
| 280 | } |
| 281 | |
| 282 | /* |
| 283 | * Return TRUE if the address passed matches the current program |
| 284 | * state (linenumber, ps, lastline) |
| 285 | */ |
| 286 | static int inline |
| 287 | match(a) |
| 288 | struct s_addr *a; |
| 289 | { |
| 290 | int eval; |
| 291 | |
| 292 | switch (a->type) { |
| 293 | case AT_RE: |
| 294 | switch (eval = regexec(a->u.r, ps, 0, NULL, 0)) { |
| 295 | case 0: |
| 296 | return (1); |
| 297 | case REG_NOMATCH: |
| 298 | return (0); |
| 299 | default: |
| 300 | err(FATAL, "RE error: %s", strregerror(eval, a->u.r)); |
| 301 | } |
| 302 | case AT_LINE: |
| 303 | return (linenum == a->u.l); |
| 304 | case AT_LAST: |
| 305 | return (lastline); |
| 306 | } |
| 307 | /* NOTREACHED */ |
| 308 | } |
| 309 | |
| 310 | /* |
| 311 | * substitute -- |
| 312 | * Do substitutions in the pattern space. Currently, we build a |
| 313 | * copy of the new pattern space in the substitute space structure |
| 314 | * and then swap them. |
| 315 | */ |
| 316 | static int |
| 317 | substitute(cp) |
| 318 | struct s_command *cp; |
| 319 | { |
| 320 | SPACE tspace; |
| 321 | static regex_t *re; |
| 322 | int n, re_off; |
| 323 | char *endp, *s; |
| 324 | |
| 325 | s = ps; |
| 326 | re = &cp->u.s->re; |
| 327 | if (regexec_check(re, |
| 328 | s, re->re_nsub + 1, cp->u.s->pmatch, 0) == REG_NOMATCH) |
| 329 | return (0); |
| 330 | |
| 331 | SS.len = 0; /* Clean substitute space. */ |
| 332 | n = cp->u.s->n; |
| 333 | switch (n) { |
| 334 | case 0: /* Global */ |
| 335 | do { |
| 336 | /* Locate start of replaced string. */ |
| 337 | re_off = cp->u.s->pmatch[0].rm_so; |
| 338 | /* Locate end of replaced string + 1. */ |
| 339 | endp = s + cp->u.s->pmatch[0].rm_eo; |
| 340 | /* Copy leading retained string. */ |
| 341 | cspace(&SS, s, re_off, 0); |
| 342 | /* Add in regular expression. */ |
| 343 | regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS); |
| 344 | /* Move past this match. */ |
| 345 | s += cp->u.s->pmatch[0].rm_eo; |
| 346 | } while(regexec_check(re, s, re->re_nsub + 1, |
| 347 | cp->u.s->pmatch, REG_NOTBOL) != REG_NOMATCH); |
| 348 | /* Copy trailing retained string. */ |
| 349 | cspace(&SS, s, strlen(s), 0); |
| 350 | break; |
| 351 | default: /* Nth occurrence */ |
| 352 | while (--n) { |
| 353 | s += cp->u.s->pmatch[0].rm_eo; |
| 354 | if (regexec_check(re, s, re->re_nsub + 1, |
| 355 | cp->u.s->pmatch, REG_NOTBOL) == REG_NOMATCH) |
| 356 | return (0); |
| 357 | } |
| 358 | /* FALLTHROUGH */ |
| 359 | case 1: /* 1st occurrence */ |
| 360 | /* Locate start of replaced string. */ |
| 361 | re_off = cp->u.s->pmatch[0].rm_so + s - ps; |
| 362 | /* Copy leading retained string. */ |
| 363 | cspace(&SS, ps, re_off, 0); |
| 364 | /* Add in regular expression. */ |
| 365 | regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS); |
| 366 | /* Copy trailing retained string. */ |
| 367 | s += cp->u.s->pmatch[0].rm_eo; |
| 368 | cspace(&SS, s, strlen(s), 0); |
| 369 | break; |
| 370 | } |
| 371 | |
| 372 | /* |
| 373 | * Swap the substitute space and the pattern space, and make sure |
| 374 | * that any leftover pointers into stdio memory get lost. |
| 375 | */ |
| 376 | tspace = PS; |
| 377 | PS = SS; |
| 378 | SS = tspace; |
| 379 | SS.space = SS.back; |
| 380 | |
| 381 | /* Handle the 'p' flag. */ |
| 382 | if (cp->u.s->p) |
| 383 | (void)printf("%s\n", ps); |
| 384 | |
| 385 | /* Handle the 'w' flag. */ |
| 386 | if (cp->u.s->wfile && !pd) { |
| 387 | if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, |
| 388 | O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) |
| 389 | err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); |
| 390 | iov[0].iov_base = ps; |
| 391 | iov[0].iov_len = psl; |
| 392 | if (writev(cp->u.s->wfd, iov, 2) != psl + 1) |
| 393 | err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); |
| 394 | } |
| 395 | return (1); |
| 396 | } |
| 397 | |
| 398 | /* |
| 399 | * Flush append requests. Always called before reading a line, |
| 400 | * therefore it also resets the substitution done (sdone) flag. |
| 401 | */ |
| 402 | static void |
| 403 | flush_appends() |
| 404 | { |
| 405 | FILE *f; |
| 406 | int count, i; |
| 407 | char buf[8 * 1024]; |
| 408 | |
| 409 | for (i = 0; i < appendx; i++) |
| 410 | switch (appends[i].type) { |
| 411 | case AP_STRING: |
| 412 | (void)printf("%s", appends[i].s); |
| 413 | break; |
| 414 | case AP_FILE: |
| 415 | /* |
| 416 | * Read files probably shouldn't be cached. Since |
| 417 | * it's not an error to read a non-existent file, |
| 418 | * it's possible that another program is interacting |
| 419 | * with the sed script through the file system. It |
| 420 | * would be truly bizarre, but possible. It's probably |
| 421 | * not that big a performance win, anyhow. |
| 422 | */ |
| 423 | if ((f = fopen(appends[i].s, "r")) == NULL) |
| 424 | break; |
| 425 | while (count = fread(buf, 1, sizeof(buf), f)) |
| 426 | (void)fwrite(buf, 1, count, stdout); |
| 427 | (void)fclose(f); |
| 428 | break; |
| 429 | } |
| 430 | if (ferror(stdout)) |
| 431 | err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); |
| 432 | appendx = 0; |
| 433 | sdone = 0; |
| 434 | } |
| 435 | |
| 436 | static void |
| 437 | lputs(s) |
| 438 | register char *s; |
| 439 | { |
| 440 | register int count; |
| 441 | register char *escapes, *p; |
| 442 | struct winsize win; |
| 443 | static int termwidth = -1; |
| 444 | |
| 445 | if (termwidth == -1) |
| 446 | if (p = getenv("COLUMNS")) |
| 447 | termwidth = atoi(p); |
| 448 | else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && |
| 449 | win.ws_col > 0) |
| 450 | termwidth = win.ws_col; |
| 451 | else |
| 452 | termwidth = 60; |
| 453 | |
| 454 | for (count = 0; *s; ++s) { |
| 455 | if (count >= termwidth) { |
| 456 | (void)printf("\\\n"); |
| 457 | count = 0; |
| 458 | } |
| 459 | if (isascii(*s) && isprint(*s) && *s != '\\') { |
| 460 | (void)putchar(*s); |
| 461 | count++; |
| 462 | } else { |
| 463 | escapes = "\\\a\b\f\n\r\t\v"; |
| 464 | (void)putchar('\\'); |
| 465 | if (p = strchr(escapes, *s)) { |
| 466 | (void)putchar("\\abfnrtv"[p - escapes]); |
| 467 | count += 2; |
| 468 | } else { |
| 469 | (void)printf("%03o", (u_char)*s); |
| 470 | count += 4; |
| 471 | } |
| 472 | } |
| 473 | } |
| 474 | (void)putchar('$'); |
| 475 | (void)putchar('\n'); |
| 476 | if (ferror(stdout)) |
| 477 | err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); |
| 478 | } |
| 479 | |
| 480 | /* |
| 481 | * Regexec with checking for errors |
| 482 | */ |
| 483 | static int |
| 484 | regexec_check(preg, string, nmatch, pmatch, eflags) |
| 485 | regex_t *preg; |
| 486 | const char *string; |
| 487 | int nmatch; |
| 488 | regmatch_t pmatch[]; |
| 489 | int eflags; |
| 490 | { |
| 491 | int eval; |
| 492 | |
| 493 | switch (eval = regexec(preg, string, nmatch, pmatch, eflags)) { |
| 494 | case 0: |
| 495 | return (0); |
| 496 | case REG_NOMATCH: |
| 497 | return (REG_NOMATCH); |
| 498 | default: |
| 499 | err(FATAL, "RE error: %s", strregerror(eval, preg)); |
| 500 | } |
| 501 | /* NOTREACHED */ |
| 502 | } |
| 503 | |
| 504 | /* |
| 505 | * regsub - perform substitutions after a regexp match |
| 506 | * Based on a routine by Henry Spencer |
| 507 | */ |
| 508 | static void |
| 509 | regsub(pmatch, string, src, sp) |
| 510 | regmatch_t *pmatch; |
| 511 | char *string, *src; |
| 512 | SPACE *sp; |
| 513 | { |
| 514 | register int len, no; |
| 515 | register char c, *dst; |
| 516 | |
| 517 | #define NEEDSP(reqlen) \ |
| 518 | if (sp->len >= sp->blen - (reqlen) - 1) { \ |
| 519 | sp->blen += (reqlen) + 1024; \ |
| 520 | sp->space = sp->back = xrealloc(sp->back, sp->blen); \ |
| 521 | dst = sp->space + sp->len; \ |
| 522 | } |
| 523 | |
| 524 | dst = sp->space + sp->len; |
| 525 | while ((c = *src++) != '\0') { |
| 526 | if (c == '&') |
| 527 | no = 0; |
| 528 | else if (c == '\\' && isdigit(*src)) |
| 529 | no = *src++ - '0'; |
| 530 | else |
| 531 | no = -1; |
| 532 | if (no < 0) { /* Ordinary character. */ |
| 533 | if (c == '\\' && (*src == '\\' || *src == '&')) |
| 534 | c = *src++; |
| 535 | NEEDSP(1); |
| 536 | *dst++ = c; |
| 537 | ++sp->len; |
| 538 | } else if (pmatch[no].rm_so != -1 && pmatch[no].rm_eo != -1) { |
| 539 | len = pmatch[no].rm_eo - pmatch[no].rm_so; |
| 540 | NEEDSP(len); |
| 541 | memmove(dst, string + pmatch[no].rm_so, len); |
| 542 | dst += len; |
| 543 | sp->len += len; |
| 544 | } |
| 545 | } |
| 546 | NEEDSP(1); |
| 547 | *dst = '\0'; |
| 548 | } |
| 549 | |
| 550 | /* |
| 551 | * aspace -- |
| 552 | * Append the source space to the destination space, allocating new |
| 553 | * space as necessary. |
| 554 | */ |
| 555 | static void |
| 556 | cspace(sp, p, len, append) |
| 557 | SPACE *sp; |
| 558 | char *p; |
| 559 | size_t len; |
| 560 | int append; |
| 561 | { |
| 562 | size_t tlen; |
| 563 | int needcopy; |
| 564 | |
| 565 | /* Current pointer may point to something else at the moment. */ |
| 566 | needcopy = sp->space != sp->back; |
| 567 | |
| 568 | /* |
| 569 | * Make sure SPACE has enough memory and ramp up quickly. |
| 570 | * Add in two extra bytes, one for the newline, one for a |
| 571 | * terminating NULL. |
| 572 | */ |
| 573 | tlen = sp->len + len + 2; |
| 574 | if (tlen > sp->blen) { |
| 575 | sp->blen = tlen + 1024; |
| 576 | sp->back = xrealloc(sp->back, sp->blen); |
| 577 | } |
| 578 | |
| 579 | if (needcopy) |
| 580 | memmove(sp->back, sp->space, sp->len + 1); |
| 581 | sp->space = sp->back; |
| 582 | |
| 583 | /* May just be copying out of a stdio buffer. */ |
| 584 | if (len == NULL) |
| 585 | return; |
| 586 | |
| 587 | /* Append a separating newline. */ |
| 588 | if (append) |
| 589 | sp->space[sp->len++] = '\n'; |
| 590 | |
| 591 | /* Append the new stuff, plus its terminating NULL. */ |
| 592 | memmove(sp->space + sp->len, p, len + 1); |
| 593 | sp->len += len; |
| 594 | } |
| 595 | |
| 596 | /* |
| 597 | * Close all cached opened files and report any errors |
| 598 | */ |
| 599 | void |
| 600 | cfclose(cp) |
| 601 | register struct s_command *cp; |
| 602 | { |
| 603 | |
| 604 | for (; cp != NULL; cp = cp->next) |
| 605 | switch(cp->code) { |
| 606 | case 's': |
| 607 | if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) |
| 608 | err(FATAL, |
| 609 | "%s: %s", cp->u.s->wfile, strerror(errno)); |
| 610 | break; |
| 611 | case 'w': |
| 612 | if (cp->u.fd != -1 && close(cp->u.fd)) |
| 613 | err(FATAL, "%s: %s", cp->t, strerror(errno)); |
| 614 | break; |
| 615 | case '{': |
| 616 | cfclose(cp->u.c); |
| 617 | break; |
| 618 | } |
| 619 | } |