discuss empty RE's
[unix-history] / usr / src / usr.bin / sed / process.c
CommitLineData
2b932e6d
KB
1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * %sccs.include.redist.c%
10 */
11
12#ifndef lint
bbb5d6cb 13static char sccsid[] = "@(#)process.c 5.2 (Berkeley) %G%";
2b932e6d
KB
14#endif /* not lint */
15
16#include <sys/types.h>
17#include <sys/stat.h>
18#include <sys/ioctl.h>
19#include <sys/uio.h>
20
21#include <ctype.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <limits.h>
25#include <regex.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30
31#include "defs.h"
32#include "extern.h"
33
34typedef struct {
35 char *space; /* Current space pointer. */
36 size_t len; /* Current length. */
37 int deleted; /* If deleted. */
38 char *back; /* Backing memory. */
39 size_t blen; /* Backing memory length. */
40} SPACE;
41static SPACE HS, PS, SS;
42#define pd PS.deleted
43#define ps PS.space
44#define psl PS.len
45#define hs HS.space
46#define hsl HS.len
47
48static inline int applies __P((struct s_command *));
49static void cspace __P((SPACE *, char *, size_t, int));
50static void flush_appends __P((void));
51static void lputs __P((char *));
52static inline int match __P((struct s_addr *));
53static int regexec_check __P((regex_t *, const char *,
54 int, regmatch_t[], int));
55static void regsub __P((regmatch_t *, char *, char *, SPACE *));
56static int substitute __P((struct s_command *));
57
58struct s_appends *appends; /* Array of pointers to strings to append. */
59static int appendx; /* Index into appends array. */
60int appendnum; /* Size of appends array. */
61
62static int lastaddr; /* Set by applies if last address of a range. */
63static int sdone; /* If any substitutes since last line input. */
64 /* Iov structure for 'w' commands. */
65static struct iovec iov[2] = { NULL, 0, "\n", 1 };
66
67void
68process()
69{
70 struct s_command *cp;
71 SPACE tspace;
72 size_t len;
73 char oldc, *p;
74
75 for (linenum = 0; ps = mf_fgets(&psl);) {
76 pd = 0;
77 cp = prog;
78redirect:
79 while (cp != NULL) {
80 if (!applies(cp)) {
81 cp = cp->next;
82 continue;
83 }
84 switch (cp->code) {
85 case '{':
86 cp = cp->u.c;
87 goto redirect;
88 case 'a':
89 if (appendx >= appendnum)
90 appends = xrealloc(appends,
91 sizeof(struct s_appends) *
92 (appendnum *= 2));
93 appends[appendx].type = AP_STRING;
94 appends[appendx].s = cp->t;
95 appendx++;
96 break;
97 case 'b':
98 cp = cp->u.c;
99 goto redirect;
100 case 'c':
101 pd = 1;
102 psl = 0;
103 if (cp->a2 == NULL || lastaddr)
104 (void)printf("%s", cp->t);
105 break;
106 case 'd':
107 if (pd)
108 goto new;
bbb5d6cb 109 pd = 1;
2b932e6d
KB
110 goto new;
111 case 'D':
112 if (pd)
113 goto new;
bbb5d6cb
KB
114 if ((p = strchr(ps, '\n')) == NULL)
115 pd = 1;
116 else {
2b932e6d
KB
117 psl -= (p - ps) - 1;
118 memmove(ps, p + 1, psl);
119 }
120 goto new;
121 case 'g':
122 ps = hs;
123 psl = hsl;
124 break;
125 case 'G':
126 cspace(&PS, hs, hsl, 1);
127 break;
128 case 'h':
129 cspace(&HS, ps, psl, 0);
130 break;
131 case 'H':
132 cspace(&HS, ps, psl, 1);
133 break;
134 case 'i':
135 (void)printf("%s", cp->t);
136 break;
137 case 'l':
138 lputs(ps);
139 break;
140 case 'n':
141 if (!nflag && !pd)
142 (void)printf("%s\n", ps);
143 flush_appends();
144 ps = mf_fgets(&psl);
145#ifdef HISTORIC_PRACTICE
146 if (ps == NULL)
147 exit(0);
148#endif
149 pd = 0;
150 break;
151 case 'N':
152 flush_appends();
153 if (ps != PS.back)
154 cspace(&PS, NULL, 0, 0);
155 if ((p = mf_fgets(&len)) == NULL) {
156 if (!nflag && !pd)
157 (void)printf("%s\n", ps);
158 exit(0);
159 }
160 cspace(&PS, p, len, 1);
161 break;
162 case 'p':
163 if (pd)
164 break;
165 (void)printf("%s\n", ps);
166 break;
167 case 'P':
168 if (pd)
169 break;
170 if ((p = strchr(ps, '\n')) != NULL) {
171 oldc = *p;
172 *p = '\0';
173 }
174 (void)printf("%s\n", ps);
175 if (p != NULL)
176 *p = oldc;
177 break;
178 case 'q':
179 if (!nflag && !pd)
180 (void)printf("%s\n", ps);
181 flush_appends();
182 exit(0);
183 case 'r':
184 if (appendx >= appendnum)
185 appends = xrealloc(appends,
186 sizeof(struct s_appends) *
187 (appendnum *= 2));
188 appends[appendx].type = AP_FILE;
189 appends[appendx].s = cp->t;
190 appendx++;
191 break;
192 case 's':
193 sdone = substitute(cp);
194 break;
195 case 't':
196 if (sdone) {
197 sdone = 0;
198 cp = cp->u.c;
199 goto redirect;
200 }
201 break;
202 case 'w':
203 if (pd)
204 break;
205 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
206 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
207 DEFFILEMODE)) == -1)
208 err(FATAL, "%s: %s\n",
209 cp->t, strerror(errno));
210 iov[0].iov_base = ps;
211 iov[0].iov_len = psl;
212 if (writev(cp->u.fd, iov, 2) != psl + 1)
213 err(FATAL, "%s: %s\n",
214 cp->t, strerror(errno));
215 break;
216 case 'x':
217 tspace = PS;
218 PS = HS;
219 HS = tspace;
220 break;
221 case 'y':
222 if (pd)
223 break;
224 for (p = ps, len = psl; len--; ++p)
225 *p = cp->u.y[*p];
226 break;
227 case ':':
228 case '}':
229 break;
230 case '=':
231 (void)printf("%lu\n", linenum);
232 }
233 cp = cp->next;
234 } /* for all cp */
235
236new: if (!nflag && !pd)
237 (void)printf("%s\n", ps);
238 flush_appends();
239 } /* for all lines */
240}
241
242/*
243 * Return TRUE if the command applies to the current line. Sets the inrange
244 * flag to process ranges. Interprets the non-select (``!'') flag.
245 */
246static inline int
247applies(cp)
248 struct s_command *cp;
249{
250 int r;
251
252 lastaddr = 0;
253 if (cp->a1 == NULL && cp->a2 == NULL)
254 r = 1;
255 else if (cp->a2)
256 if (cp->inrange) {
257 if (match(cp->a2)) {
258 cp->inrange = 0;
259 lastaddr = 1;
260 }
261 r = 1;
262 } else if (match(cp->a1)) {
263 /*
264 * If the second address is a number less than or
265 * equal to the line number first selected, only
266 * one line shall be selected.
267 * -- POSIX 1003.2
268 */
269 if (cp->a2->type == AT_LINE &&
270 linenum >= cp->a2->u.l)
271 lastaddr = 1;
272 else
273 cp->inrange = 1;
274 r = 1;
275 } else
276 r = 0;
277 else
278 r = match(cp->a1);
279 return (cp->nonsel ? ! r : r);
280}
281
282/*
283 * Return TRUE if the address passed matches the current program
284 * state (linenumber, ps, lastline)
285 */
286static int inline
287match(a)
288 struct s_addr *a;
289{
290 int eval;
291
292 switch (a->type) {
293 case AT_RE:
294 switch (eval = regexec(a->u.r, ps, 0, NULL, 0)) {
295 case 0:
296 return (1);
297 case REG_NOMATCH:
298 return (0);
299 default:
300 err(FATAL, "RE error: %s", strregerror(eval, a->u.r));
301 }
302 case AT_LINE:
303 return (linenum == a->u.l);
304 case AT_LAST:
305 return (lastline);
306 }
307 /* NOTREACHED */
308}
309
310/*
311 * substitute --
312 * Do substitutions in the pattern space. Currently, we build a
313 * copy of the new pattern space in the substitute space structure
314 * and then swap them.
315 */
316static int
317substitute(cp)
318 struct s_command *cp;
319{
320 SPACE tspace;
321 static regex_t *re;
322 int n, re_off;
323 char *endp, *s;
324
325 s = ps;
326 re = &cp->u.s->re;
327 if (regexec_check(re,
328 s, re->re_nsub + 1, cp->u.s->pmatch, 0) == REG_NOMATCH)
329 return (0);
330
331 SS.len = 0; /* Clean substitute space. */
332 n = cp->u.s->n;
333 switch (n) {
334 case 0: /* Global */
335 do {
336 /* Locate start of replaced string. */
337 re_off = cp->u.s->pmatch[0].rm_so;
338 /* Locate end of replaced string + 1. */
339 endp = s + cp->u.s->pmatch[0].rm_eo;
340 /* Copy leading retained string. */
341 cspace(&SS, s, re_off, 0);
342 /* Add in regular expression. */
343 regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
344 /* Move past this match. */
345 s += cp->u.s->pmatch[0].rm_eo;
346 } while(regexec_check(re, s, re->re_nsub + 1,
347 cp->u.s->pmatch, REG_NOTBOL) != REG_NOMATCH);
348 /* Copy trailing retained string. */
349 cspace(&SS, s, strlen(s), 0);
350 break;
351 default: /* Nth occurrence */
352 while (--n) {
353 s += cp->u.s->pmatch[0].rm_eo;
354 if (regexec_check(re, s, re->re_nsub + 1,
355 cp->u.s->pmatch, REG_NOTBOL) == REG_NOMATCH)
356 return (0);
357 }
358 /* FALLTHROUGH */
359 case 1: /* 1st occurrence */
360 /* Locate start of replaced string. */
361 re_off = cp->u.s->pmatch[0].rm_so + s - ps;
362 /* Copy leading retained string. */
363 cspace(&SS, ps, re_off, 0);
364 /* Add in regular expression. */
365 regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
366 /* Copy trailing retained string. */
367 s += cp->u.s->pmatch[0].rm_eo;
368 cspace(&SS, s, strlen(s), 0);
369 break;
370 }
371
372 /*
373 * Swap the substitute space and the pattern space, and make sure
374 * that any leftover pointers into stdio memory get lost.
375 */
376 tspace = PS;
377 PS = SS;
378 SS = tspace;
379 SS.space = SS.back;
380
381 /* Handle the 'p' flag. */
382 if (cp->u.s->p)
383 (void)printf("%s\n", ps);
384
385 /* Handle the 'w' flag. */
386 if (cp->u.s->wfile && !pd) {
387 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
388 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
389 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
390 iov[0].iov_base = ps;
391 iov[0].iov_len = psl;
392 if (writev(cp->u.s->wfd, iov, 2) != psl + 1)
393 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
394 }
395 return (1);
396}
397
398/*
399 * Flush append requests. Always called before reading a line,
400 * therefore it also resets the substitution done (sdone) flag.
401 */
402static void
403flush_appends()
404{
405 FILE *f;
406 int count, i;
407 char buf[8 * 1024];
408
409 for (i = 0; i < appendx; i++)
410 switch (appends[i].type) {
411 case AP_STRING:
412 (void)printf("%s", appends[i].s);
413 break;
414 case AP_FILE:
415 /*
416 * Read files probably shouldn't be cached. Since
417 * it's not an error to read a non-existent file,
418 * it's possible that another program is interacting
419 * with the sed script through the file system. It
420 * would be truly bizarre, but possible. It's probably
421 * not that big a performance win, anyhow.
422 */
423 if ((f = fopen(appends[i].s, "r")) == NULL)
424 break;
425 while (count = fread(buf, 1, sizeof(buf), f))
426 (void)fwrite(buf, 1, count, stdout);
427 (void)fclose(f);
428 break;
429 }
430 if (ferror(stdout))
431 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
432 appendx = 0;
433 sdone = 0;
434}
435
436static void
437lputs(s)
438 register char *s;
439{
440 register int count;
441 register char *escapes, *p;
442 struct winsize win;
443 static int termwidth = -1;
444
445 if (termwidth == -1)
446 if (p = getenv("COLUMNS"))
447 termwidth = atoi(p);
448 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
449 win.ws_col > 0)
450 termwidth = win.ws_col;
451 else
452 termwidth = 60;
453
454 for (count = 0; *s; ++s) {
455 if (count >= termwidth) {
456 (void)printf("\\\n");
457 count = 0;
458 }
459 if (isascii(*s) && isprint(*s) && *s != '\\') {
460 (void)putchar(*s);
461 count++;
462 } else {
463 escapes = "\\\a\b\f\n\r\t\v";
464 (void)putchar('\\');
465 if (p = strchr(escapes, *s)) {
466 (void)putchar("\\abfnrtv"[p - escapes]);
467 count += 2;
468 } else {
469 (void)printf("%03o", (u_char)*s);
470 count += 4;
471 }
472 }
473 }
474 (void)putchar('$');
475 (void)putchar('\n');
476 if (ferror(stdout))
477 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
478}
479
480/*
481 * Regexec with checking for errors
482 */
483static int
484regexec_check(preg, string, nmatch, pmatch, eflags)
485 regex_t *preg;
486 const char *string;
487 int nmatch;
488 regmatch_t pmatch[];
489 int eflags;
490{
491 int eval;
492
493 switch (eval = regexec(preg, string, nmatch, pmatch, eflags)) {
494 case 0:
495 return (0);
496 case REG_NOMATCH:
497 return (REG_NOMATCH);
498 default:
499 err(FATAL, "RE error: %s", strregerror(eval, preg));
500 }
501 /* NOTREACHED */
502}
503
504/*
505 * regsub - perform substitutions after a regexp match
506 * Based on a routine by Henry Spencer
507 */
508static void
509regsub(pmatch, string, src, sp)
510 regmatch_t *pmatch;
511 char *string, *src;
512 SPACE *sp;
513{
514 register int len, no;
515 register char c, *dst;
516
517#define NEEDSP(reqlen) \
518 if (sp->len >= sp->blen - (reqlen) - 1) { \
519 sp->blen += (reqlen) + 1024; \
520 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
521 dst = sp->space + sp->len; \
522 }
523
524 dst = sp->space + sp->len;
525 while ((c = *src++) != '\0') {
526 if (c == '&')
527 no = 0;
528 else if (c == '\\' && isdigit(*src))
529 no = *src++ - '0';
530 else
531 no = -1;
532 if (no < 0) { /* Ordinary character. */
533 if (c == '\\' && (*src == '\\' || *src == '&'))
534 c = *src++;
535 NEEDSP(1);
536 *dst++ = c;
537 ++sp->len;
538 } else if (pmatch[no].rm_so != -1 && pmatch[no].rm_eo != -1) {
539 len = pmatch[no].rm_eo - pmatch[no].rm_so;
540 NEEDSP(len);
541 memmove(dst, string + pmatch[no].rm_so, len);
542 dst += len;
543 sp->len += len;
544 }
545 }
546 NEEDSP(1);
547 *dst = '\0';
548}
549
550/*
551 * aspace --
552 * Append the source space to the destination space, allocating new
553 * space as necessary.
554 */
555static void
556cspace(sp, p, len, append)
557 SPACE *sp;
558 char *p;
559 size_t len;
560 int append;
561{
562 size_t tlen;
563 int needcopy;
564
565 /* Current pointer may point to something else at the moment. */
566 needcopy = sp->space != sp->back;
567
568 /*
569 * Make sure SPACE has enough memory and ramp up quickly.
570 * Add in two extra bytes, one for the newline, one for a
571 * terminating NULL.
572 */
573 tlen = sp->len + len + 2;
574 if (tlen > sp->blen) {
575 sp->blen = tlen + 1024;
576 sp->back = xrealloc(sp->back, sp->blen);
577 }
578
579 if (needcopy)
580 memmove(sp->back, sp->space, sp->len + 1);
581 sp->space = sp->back;
582
583 /* May just be copying out of a stdio buffer. */
584 if (len == NULL)
585 return;
586
587 /* Append a separating newline. */
588 if (append)
589 sp->space[sp->len++] = '\n';
590
591 /* Append the new stuff, plus its terminating NULL. */
592 memmove(sp->space + sp->len, p, len + 1);
593 sp->len += len;
594}
595
596/*
597 * Close all cached opened files and report any errors
598 */
599void
600cfclose(cp)
601 register struct s_command *cp;
602{
603
604 for (; cp != NULL; cp = cp->next)
605 switch(cp->code) {
606 case 's':
607 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
608 err(FATAL,
609 "%s: %s", cp->u.s->wfile, strerror(errno));
610 break;
611 case 'w':
612 if (cp->u.fd != -1 && close(cp->u.fd))
613 err(FATAL, "%s: %s", cp->t, strerror(errno));
614 break;
615 case '{':
616 cfclose(cp->u.c);
617 break;
618 }
619}