Oh GACK! src-clean doesn't quite work that easily since cleandist rebuilds the
[unix-history] / usr.bin / sed / sed.c
CommitLineData
15637ed4
RG
1/* GNU SED, a batch stream editor.
2 Copyright (C) 1989-1991 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
17
18#ifdef __STDC__
19#define VOID void
20#else
21#define VOID char
22#endif
23
24#define _GNU_SOURCE
25#include <ctype.h>
26#ifndef isblank
27#define isblank(c) ((c) == ' ' || (c) == '\t')
28#endif
29#include <stdio.h>
30#include <regex.h>
31#include <getopt.h>
32#if defined(STDC_HEADERS)
33#include <stdlib.h>
34#endif
35#if defined(USG) || defined(STDC_HEADERS)
36#include <string.h>
37#include <memory.h>
38#define bcopy(s, d, n) (memcpy((d), (s), (n)))
39#else
40#include <strings.h>
41VOID *memchr();
42#endif
43
44char *version_string = "GNU sed version 1.08";
45
46/* Struct vector is used to describe a chunk of a sed program. There is one
47 vector for the main program, and one for each { } pair. */
48struct vector {
49 struct sed_cmd *v;
50 int v_length;
51 int v_allocated;
52 struct vector *up_one;
53 struct vector *next_one;
54};
55
56
57/* Goto structure is used to hold both GOTO's and labels. There are two
58 separate lists, one of goto's, called 'jumps', and one of labels, called
59 'labels'.
60 the V element points to the descriptor for the program-chunk in which the
61 goto was encountered.
62 the v_index element counts which element of the vector actually IS the
63 goto/label. The first element of the vector is zero.
64 the NAME element is the null-terminated name of the label.
65 next is the next goto/label in the list. */
66
67struct sed_label {
68 struct vector *v;
69 int v_index;
70 char *name;
71 struct sed_label *next;
72};
73
74/* ADDR_TYPE is zero for a null address,
75 one if addr_number is valid, or
76 two if addr_regex is valid,
77 three, if the address is '$'
78
79 Other values are undefined.
80 */
81
82#define ADDR_NULL 0
83#define ADDR_NUM 1
84#define ADDR_REGEX 2
85#define ADDR_LAST 3
86
87struct addr {
88 int addr_type;
89 struct re_pattern_buffer *addr_regex;
90 int addr_number;
91};
92
93
94/* Aflags: If the low order bit is set, a1 has been
95 matched; apply this command until a2 matches.
96 If the next bit is set, apply this command to all
97 lines that DON'T match the address(es).
98 */
99
100#define A1_MATCHED_BIT 01
101#define ADDR_BANG_BIT 02
102
103
104struct sed_cmd {
105 struct addr a1,a2;
106 int aflags;
107
108 char cmd;
109
110 union {
111 /* This structure is used for a, i, and c commands */
112 struct {
113 char *text;
114 int text_len;
115 } cmd_txt;
116
117 /* This is used for b and t commands */
118 struct sed_cmd *label;
119
120 /* This for r and w commands */
121 FILE *io_file;
122
123 /* This for the hairy s command */
124 /* For the flags var:
125 low order bit means the 'g' option was given,
126 next bit means the 'p' option was given,
127 and the next bit means a 'w' option was given,
128 and wio_file contains the file to write to. */
129
130#define S_GLOBAL_BIT 01
131#define S_PRINT_BIT 02
132#define S_WRITE_BIT 04
133#define S_NUM_BIT 010
134
135 struct {
136 struct re_pattern_buffer *regx;
137 char *replacement;
138 int replace_length;
139 int flags;
140 int numb;
141 FILE *wio_file;
142 } cmd_regex;
143
144 /* This for the y command */
145 unsigned char *translate;
146
147 /* For { and } */
148 struct vector *sub;
149 struct sed_label *jump;
150 } x;
151};
152
153/* Sed operates a line at a time. */
154struct line {
155 char *text; /* Pointer to line allocated by malloc. */
156 int length; /* Length of text. */
157 int alloc; /* Allocated space for text. */
158};
159
160/* This structure holds information about files opend by the 'r', 'w',
161 and 's///w' commands. In paticular, it holds the FILE pointer to
162 use, the file's name, a flag that is non-zero if the file is being
163 read instead of written. */
164
165#define NUM_FPS 32
166struct {
167 FILE *phile;
168 char *name;
169 int readit;
170} file_ptrs[NUM_FPS];
171
172
173#if defined(__STDC__)
174# define P_(s) s
175#else
176# define P_(s) ()
177#endif
178
179void panic P_((char *str, ...));
180char *__fp_name P_((FILE *fp));
181FILE *ck_fopen P_((char *name, char *mode));
182void ck_fwrite P_((char *ptr, int size, int nmemb, FILE *stream));
183void ck_fclose P_((FILE *stream));
184VOID *ck_malloc P_((int size));
185VOID *ck_realloc P_((VOID *ptr, int size));
186char *ck_strdup P_((char *str));
187VOID *init_buffer P_((void));
188void flush_buffer P_((VOID *bb));
189int size_buffer P_((VOID *b));
190void add_buffer P_((VOID *bb, char *p, int n));
191void add1_buffer P_((VOID *bb, int ch));
192char *get_buffer P_((VOID *bb));
193
194void compile_string P_((char *str));
195void compile_file P_((char *str));
196struct vector *compile_program P_((struct vector *vector));
197void bad_prog P_((char *why));
198int inchar P_((void));
199void savchar P_((int ch));
200int compile_address P_((struct addr *addr));
201void compile_regex P_((int slash));
202struct sed_label *setup_jump P_((struct sed_label *list, struct sed_cmd *cmd, struct vector *vec));
203FILE *compile_filename P_((int readit));
204void read_file P_((char *name));
205void execute_program P_((struct vector *vec));
206int match_address P_((struct addr *addr));
207int read_pattern_space P_((void));
208void append_pattern_space P_((void));
209void line_copy P_((struct line *from, struct line *to));
210void line_append P_((struct line *from, struct line *to));
211void str_append P_((struct line *to, char *string, int length));
212void usage P_((void));
213
214extern char *myname;
215
216/* If set, don't write out the line unless explictly told to */
217int no_default_output = 0;
218
219/* Current input line # */
220int input_line_number = 0;
221
222/* Are we on the last input file? */
223int last_input_file = 0;
224
225/* Have we hit EOF on the last input file? This is used to decide if we
226 have hit the '$' address yet. */
227int input_EOF = 0;
228
229/* non-zero if a quit command has been executed. */
230int quit_cmd = 0;
231
232/* Have we done any replacements lately? This is used by the 't' command. */
233int replaced = 0;
234
235/* How many '{'s are we executing at the moment */
236int program_depth = 0;
237
238/* The complete compiled SED program that we are going to run */
239struct vector *the_program = 0;
240
241/* information about labels and jumps-to-labels. This is used to do
242 the required backpatching after we have compiled all the scripts. */
243struct sed_label *jumps = 0;
244struct sed_label *labels = 0;
245
246/* The 'current' input line. */
247struct line line;
248
249/* An input line that's been stored by later use by the program */
250struct line hold;
251
252/* A 'line' to append to the current line when it comes time to write it out */
253struct line append;
254
255
256/* When we're reading a script command from a string, 'prog_start' and
257 'prog_end' point to the beginning and end of the string. This
258 would allow us to compile script strings that contain nulls, except
259 that script strings are only read from the command line, which is
260 null-terminated */
261char *prog_start;
262char *prog_end;
263
264/* When we're reading a script command from a string, 'prog_cur' points
265 to the current character in the string */
266char *prog_cur;
267
268/* This is the name of the current script file.
269 It is used for error messages. */
270char *prog_name;
271
272/* This is the current script file. If it is zero, we are reading
273 from a string stored in 'prog_start' instead. If both 'prog_file'
274 and 'prog_start' are zero, we're in trouble! */
275FILE *prog_file;
276
277/* this is the number of the current script line that we're compiling. It is
278 used to give out useful and informative error messages. */
279int prog_line = 1;
280
281/* This is the file pointer that we're currently reading data from. It may
282 be stdin */
283FILE *input_file;
284
285/* If this variable is non-zero at exit, one or more of the input
286 files couldn't be opened. */
287
288int bad_input = 0;
289
290/* 'an empty regular expression is equivalent to the last regular
291 expression read' so we have to keep track of the last regex used.
292 Here's where we store a pointer to it (it is only malloc()'d once) */
293struct re_pattern_buffer *last_regex;
294
295/* Various error messages we may want to print */
296static char ONE_ADDR[] = "Command only uses one address";
297static char NO_ADDR[] = "Command doesn't take any addresses";
298static char LINE_JUNK[] = "Extra characters after command";
299static char BAD_EOF[] = "Unexpected End-of-file";
300static char NO_REGEX[] = "No previous regular expression";
301
302static struct option longopts[] =
303{
304 {"expression", 1, NULL, 'e'},
305 {"file", 1, NULL, 'f'},
306 {"quiet", 0, NULL, 'n'},
307 {"silent", 0, NULL, 'n'},
308 {"version", 0, NULL, 'V'},
309 {NULL, 0, NULL, 0}
310};
311
312/* Yes, the main program, which parses arguments, and does the right
313 thing with them; it also inits the temporary storage, etc. */
314void
315main(argc,argv)
316int argc;
317char **argv;
318{
319 int opt;
320 char *e_strings = NULL;
321 int compiled = 0;
322 struct sed_label *go,*lbl;
323
324 myname=argv[0];
325 while((opt=getopt_long(argc,argv,"ne:f:V", longopts, (int *) 0))
326 !=EOF) {
327 switch(opt) {
328 case 'n':
329 no_default_output = 1;
330 break;
331 case 'e':
332 if(e_strings == NULL) {
333 e_strings=ck_malloc(strlen(optarg)+2);
334 strcpy(e_strings,optarg);
335 } else {
336 e_strings=ck_realloc(e_strings,strlen(e_strings)+strlen(optarg)+2);
337 strcat(e_strings,optarg);
338 }
339 strcat(e_strings,"\n");
340 compiled = 1;
341 break;
342 case 'f':
343 compile_file(optarg);
344 compiled = 1;
345 break;
346 case 'V':
347 fprintf(stderr, "%s\n", version_string);
348 break;
349 default:
350 usage();
351 }
352 }
353 if(e_strings) {
354 compile_string(e_strings);
355 free(e_strings);
356 }
357 if(!compiled) {
358 if (optind == argc)
359 usage();
360 compile_string(argv[optind++]);
361 }
362
363 for(go=jumps;go;go=go->next) {
364 for(lbl=labels;lbl;lbl=lbl->next)
365 if(!strcmp(lbl->name,go->name))
366 break;
367 if(*go->name && !lbl)
368 panic("Can't find label for jump to '%s'",go->name);
369 go->v->v[go->v_index].x.jump=lbl;
370 }
371
372 line.length=0;
373 line.alloc=50;
374 line.text=ck_malloc(50);
375
376 append.length=0;
377 append.alloc=50;
378 append.text=ck_malloc(50);
379
380 hold.length=0;
381 hold.alloc=50;
382 hold.text=ck_malloc(50);
383
384 if(argc<=optind) {
385 last_input_file++;
386 read_file("-");
387 } else while(optind<argc) {
388 if(optind==argc-1)
389 last_input_file++;
390 read_file(argv[optind]);
391 optind++;
392 if(quit_cmd)
393 break;
394 }
395 if(bad_input)
396 exit(2);
397 exit(0);
398}
399
400/* 'str' is a string (from the command line) that contains a sed command.
401 Compile the command, and add it to the end of 'the_program' */
402void
403compile_string(str)
404char *str;
405{
406 prog_file = 0;
407 prog_line=0;
408 prog_start=prog_cur=str;
409 prog_end=str+strlen(str);
410 the_program=compile_program(the_program);
411}
412
413/* 'str' is the name of a file containing sed commands. Read them in
414 and add them to the end of 'the_program' */
415void
416compile_file(str)
417char *str;
418{
419 int ch;
420
421 prog_start=prog_cur=prog_end=0;
422 prog_name=str;
423 prog_line=1;
424 if(str[0]=='-' && str[1]=='\0')
425 prog_file=stdin;
426 else
427 prog_file=ck_fopen(str,"r");
428 ch=getc(prog_file);
429 if(ch=='#') {
430 ch=getc(prog_file);
431 if(ch=='n')
432 no_default_output++;
433 while(ch!=EOF && ch!='\n')
434 ch=getc(prog_file);
435 } else if(ch!=EOF)
436 ungetc(ch,prog_file);
437 the_program=compile_program(the_program);
438}
439
440#define MORE_CMDS 40
441
442/* Read a program (or a subprogram within '{' '}' pairs) in and store
443 the compiled form in *'vector' Return a pointer to the new vector. */
444struct vector *
445compile_program(vector)
446struct vector *vector;
447{
448 struct sed_cmd *cur_cmd;
449 int ch;
450 int slash;
451 VOID *b;
452 unsigned char *string;
453 int num;
454
455 if(!vector) {
456 vector=(struct vector *)ck_malloc(sizeof(struct vector));
457 vector->v=(struct sed_cmd *)ck_malloc(MORE_CMDS*sizeof(struct sed_cmd));
458 vector->v_allocated=MORE_CMDS;
459 vector->v_length=0;
460 vector->up_one = 0;
461 vector->next_one = 0;
462 }
463 for(;;) {
464 skip_comment:
465 do ch=inchar();
466 while(ch!=EOF && (isblank(ch) || ch=='\n' || ch==';'));
467 if(ch==EOF)
468 break;
469 savchar(ch);
470
471 if(vector->v_length==vector->v_allocated) {
472 vector->v=(struct sed_cmd *)ck_realloc((VOID *)vector->v,(vector->v_length+MORE_CMDS)*sizeof(struct sed_cmd));
473 vector->v_allocated+=MORE_CMDS;
474 }
475 cur_cmd=vector->v+vector->v_length;
476 vector->v_length++;
477
478 cur_cmd->a1.addr_type=0;
479 cur_cmd->a2.addr_type=0;
480 cur_cmd->aflags=0;
481 cur_cmd->cmd=0;
482
483 if(compile_address(&(cur_cmd->a1))) {
484 ch=inchar();
485 if(ch==',') {
486 do ch=inchar();
487 while(ch!=EOF && isblank(ch));
488 savchar(ch);
489 if(compile_address(&(cur_cmd->a2)))
490 ;
491 else
492 bad_prog("Unexpected ','");
493 } else
494 savchar(ch);
495 }
496 ch=inchar();
497 if(ch==EOF)
498 break;
499 new_cmd:
500 switch(ch) {
501 case '#':
502 if(cur_cmd->a1.addr_type!=0)
503 bad_prog(NO_ADDR);
504 do ch=inchar();
505 while(ch!=EOF && ch!='\n');
506 vector->v_length--;
507 goto skip_comment;
508 case '!':
509 if(cur_cmd->aflags & ADDR_BANG_BIT)
510 bad_prog("Multiple '!'s");
511 cur_cmd->aflags|= ADDR_BANG_BIT;
512 do ch=inchar();
513 while(ch!=EOF && isblank(ch));
514 if(ch==EOF)
515 bad_prog(BAD_EOF);
516#if 0
517 savchar(ch);
518#endif
519 goto new_cmd;
520 case 'a':
521 case 'i':
522 if(cur_cmd->a2.addr_type!=0)
523 bad_prog(ONE_ADDR);
524 /* Fall Through */
525 case 'c':
526 cur_cmd->cmd=ch;
527 if(inchar()!='\\' || inchar()!='\n')
528 bad_prog(LINE_JUNK);
529 b=init_buffer();
530 while((ch=inchar())!=EOF && ch!='\n') {
531 if(ch=='\\')
532 ch=inchar();
533 add1_buffer(b,ch);
534 }
535 if(ch!=EOF)
536 add1_buffer(b,ch);
537 num=size_buffer(b);
538 string=(unsigned char *)ck_malloc(num);
539 bcopy(get_buffer(b),string,num);
540 flush_buffer(b);
541 cur_cmd->x.cmd_txt.text_len=num;
542 cur_cmd->x.cmd_txt.text=(char *)string;
543 break;
544 case '{':
545 cur_cmd->cmd=ch;
546 program_depth++;
547#if 0
548 while((ch=inchar())!=EOF && ch!='\n')
549 if(!isblank(ch))
550 bad_prog(LINE_JUNK);
551#endif
552 cur_cmd->x.sub=compile_program((struct vector *)0);
553 /* FOO JF is this the right thing to do? */
554 break;
555 case '}':
556 if(!program_depth)
557 bad_prog("Unexpected '}'");
558 --(vector->v_length);
559 while((ch=inchar())!=EOF && ch!='\n' && ch!=';')
560 if(!isblank(ch))
561 bad_prog(LINE_JUNK);
562 return vector;
563 case ':':
564 cur_cmd->cmd=ch;
565 if(cur_cmd->a1.addr_type!=0)
566 bad_prog(": doesn't want any addresses");
567 labels=setup_jump(labels,cur_cmd,vector);
568 break;
569 case 'b':
570 case 't':
571 cur_cmd->cmd=ch;
572 jumps=setup_jump(jumps,cur_cmd,vector);
573 break;
574 case 'q':
575 case '=':
576 if(cur_cmd->a2.addr_type)
577 bad_prog(ONE_ADDR);
578 /* Fall Through */
579 case 'd':
580 case 'D':
581 case 'g':
582 case 'G':
583 case 'h':
584 case 'H':
585 case 'l':
586 case 'n':
587 case 'N':
588 case 'p':
589 case 'P':
590 case 'x':
591 cur_cmd->cmd=ch;
592 do ch=inchar();
593 while(ch!=EOF && isblank(ch) && ch!='\n' && ch!=';');
594 if(ch!='\n' && ch!=';' && ch!=EOF)
595 bad_prog(LINE_JUNK);
596 break;
597
598 case 'r':
599 if(cur_cmd->a2.addr_type!=0)
600 bad_prog(ONE_ADDR);
601 /* FALL THROUGH */
602 case 'w':
603 cur_cmd->cmd=ch;
604 cur_cmd->x.io_file=compile_filename(ch=='r');
605 break;
606
607 case 's':
608 cur_cmd->cmd=ch;
609 slash=inchar();
610 compile_regex(slash);
611
612 cur_cmd->x.cmd_regex.regx=last_regex;
613
614 b=init_buffer();
615 while((ch=inchar())!=EOF && ch!=slash) {
616 if(ch=='\\') {
617 int ci;
618
619 ci=inchar();
620 if(ci!=EOF) {
621 if(ci!='\n')
622 add1_buffer(b,ch);
623 add1_buffer(b,ci);
624 }
625 } else
626 add1_buffer(b,ch);
627 }
628 cur_cmd->x.cmd_regex.replace_length=size_buffer(b);
629 cur_cmd->x.cmd_regex.replacement=ck_malloc(cur_cmd->x.cmd_regex.replace_length);
630 bcopy(get_buffer(b),cur_cmd->x.cmd_regex.replacement,cur_cmd->x.cmd_regex.replace_length);
631 flush_buffer(b);
632
633 cur_cmd->x.cmd_regex.flags=0;
634 cur_cmd->x.cmd_regex.numb=0;
635
636 if(ch==EOF)
637 break;
638 do {
639 ch=inchar();
640 switch(ch) {
641 case 'p':
642 if(cur_cmd->x.cmd_regex.flags&S_PRINT_BIT)
643 bad_prog("multiple 'p' options to 's' command");
644 cur_cmd->x.cmd_regex.flags|=S_PRINT_BIT;
645 break;
646 case 'g':
647 if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT)
648 cur_cmd->x.cmd_regex.flags&= ~S_NUM_BIT;
649 if(cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT)
650 bad_prog("multiple 'g' options to 's' command");
651 cur_cmd->x.cmd_regex.flags|=S_GLOBAL_BIT;
652 break;
653 case 'w':
654 cur_cmd->x.cmd_regex.flags|=S_WRITE_BIT;
655 cur_cmd->x.cmd_regex.wio_file=compile_filename(0);
656 ch='\n';
657 break;
658 case '0': case '1': case '2': case '3':
659 case '4': case '5': case '6': case '7':
660 case '8': case '9':
661 if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT)
662 bad_prog("multiple number options to 's' command");
663 if((cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT)==0)
664 cur_cmd->x.cmd_regex.flags|=S_NUM_BIT;
665 num = 0;
666 while(isdigit(ch)) {
667 num=num*10+ch-'0';
668 ch=inchar();
669 }
670 savchar(ch);
671 cur_cmd->x.cmd_regex.numb=num;
672 break;
673 case '\n':
674 case ';':
675 case EOF:
676 break;
677 default:
678 bad_prog("Unknown option to 's'");
679 break;
680 }
681 } while(ch!=EOF && ch!='\n' && ch!=';');
682 if(ch==EOF)
683 break;
684 break;
685
686 case 'y':
687 cur_cmd->cmd=ch;
688 string=(unsigned char *)ck_malloc(256);
689 for(num=0;num<256;num++)
690 string[num]=num;
691 b=init_buffer();
692 slash=inchar();
693 while((ch=inchar())!=EOF && ch!=slash)
694 add1_buffer(b,ch);
695 cur_cmd->x.translate=string;
696 string=(unsigned char *)get_buffer(b);
697 for(num=size_buffer(b);num;--num) {
698 ch=inchar();
699 if(ch==EOF)
700 bad_prog(BAD_EOF);
701 if(ch==slash)
702 bad_prog("strings for y command are different lengths");
703 cur_cmd->x.translate[*string++]=ch;
704 }
705 flush_buffer(b);
706 if(inchar()!=slash || ((ch=inchar())!=EOF && ch!='\n' && ch!=';'))
707 bad_prog(LINE_JUNK);
708 break;
709
710 default:
711 bad_prog("Unknown command");
712 }
713 }
714 return vector;
715}
716
717/* Complain about a programming error and exit. */
718void
719bad_prog(why)
720char *why;
721{
722 if(prog_line)
723 fprintf(stderr,"%s: file %s line %d: %s\n",myname,prog_name,prog_line,why);
724 else
725 fprintf(stderr,"%s: %s\n",myname,why);
726 exit(1);
727}
728
729/* Read the next character from the program. Return EOF if there isn't
730 anything to read. Keep prog_line up to date, so error messages can
731 be meaningful. */
732int
733inchar()
734{
735 int ch;
736 if(prog_file) {
737 if(feof(prog_file))
738 return EOF;
739 else
740 ch=getc(prog_file);
741 } else {
742 if(!prog_cur)
743 return EOF;
744 else if(prog_cur==prog_end) {
745 ch=EOF;
746 prog_cur=0;
747 } else
748 ch= *prog_cur++;
749 }
750 if(ch=='\n' && prog_line)
751 prog_line++;
752 return ch;
753}
754
755/* unget 'ch' so the next call to inchar will return it. 'ch' must not be
756 EOF or anything nasty like that. */
757void
758savchar(ch)
759int ch;
760{
761 if(ch==EOF)
762 return;
763 if(ch=='\n' && prog_line>1)
764 --prog_line;
765 if(prog_file)
766 ungetc(ch,prog_file);
767 else
768 *--prog_cur=ch;
769}
770
771
772/* Try to read an address for a sed command. If it succeeeds,
773 return non-zero and store the resulting address in *'addr'.
774 If the input doesn't look like an address read nothing
775 and return zero. */
776int
777compile_address(addr)
778struct addr *addr;
779{
780 int ch;
781 int num;
782
783 ch=inchar();
784
785 if(isdigit(ch)) {
786 num=ch-'0';
787 while((ch=inchar())!=EOF && isdigit(ch))
788 num=num*10+ch-'0';
789 while(ch!=EOF && isblank(ch))
790 ch=inchar();
791 savchar(ch);
792 addr->addr_type=ADDR_NUM;
793 addr->addr_number = num;
794 return 1;
795 } else if(ch=='/') {
796 addr->addr_type=ADDR_REGEX;
797 compile_regex('/');
798 addr->addr_regex=last_regex;
799 do ch=inchar();
800 while(ch!=EOF && isblank(ch));
801 savchar(ch);
802 return 1;
803 } else if(ch=='$') {
804 addr->addr_type=ADDR_LAST;
805 do ch=inchar();
806 while(ch!=EOF && isblank(ch));
807 savchar(ch);
808 return 1;
809 } else
810 savchar(ch);
811 return 0;
812}
813
814void
815compile_regex (slash)
816 int slash;
817{
818 VOID *b;
819 int ch;
820 int in_char_class = 0;
821
822 b=init_buffer();
823 while((ch=inchar())!=EOF && (ch!=slash || in_char_class)) {
824 if(ch=='^') {
825 if(size_buffer(b)==0) {
826 add1_buffer(b,'\\');
827 add1_buffer(b,'`');
828 } else
829 add1_buffer(b,ch);
830 continue;
831 } else if(ch=='$') {
832 ch=inchar();
833 savchar(ch);
834 if(ch==slash) {
835 add1_buffer(b,'\\');
836 add1_buffer(b,'\'');
837 } else
838 add1_buffer(b,'$');
839 continue;
840 } else if(ch == '[') {
841 add1_buffer(b,ch);
842 in_char_class = 1;
843 continue;
844 } else if(ch == ']') {
845 add1_buffer(b,ch);
846 in_char_class = 0;
847 continue;
848 } else if(ch!='\\') {
849 add1_buffer(b,ch);
850 continue;
851 }
852 ch=inchar();
853 switch(ch) {
854 case 'n':
855 add1_buffer(b,'\n');
856 break;
857#if 0
858 case 'b':
859 add1_buffer(b,'\b');
860 break;
861 case 'f':
862 add1_buffer(b,'\f');
863 break;
864 case 'r':
865 add1_buffer(b,'\r');
866 break;
867 case 't':
868 add1_buffer(b,'\t');
869 break;
870#endif /* 0 */
871 case EOF:
872 break;
873 default:
874 add1_buffer(b,'\\');
875 add1_buffer(b,ch);
876 break;
877 }
878 }
879 if(ch==EOF)
880 bad_prog(BAD_EOF);
881 if(size_buffer(b)) {
882 last_regex=(struct re_pattern_buffer *)ck_malloc(sizeof(struct re_pattern_buffer));
883 last_regex->allocated=size_buffer(b)+10;
884 last_regex->buffer=ck_malloc(last_regex->allocated);
885 last_regex->fastmap=ck_malloc(256);
886 last_regex->translate=0;
887 re_compile_pattern(get_buffer(b),size_buffer(b),last_regex);
888 } else if(!last_regex)
889 bad_prog(NO_REGEX);
890 flush_buffer(b);
891}
892
893/* Store a label (or label reference) created by a ':', 'b', or 't'
894 comand so that the jump to/from the lable can be backpatched after
895 compilation is complete */
896struct sed_label *
897setup_jump(list,cmd,vec)
898struct sed_label *list;
899struct sed_cmd *cmd;
900struct vector *vec;
901{
902 struct sed_label *tmp;
903 VOID *b;
904 int ch;
905
906 b=init_buffer();
907 while((ch=inchar()) != EOF && isblank(ch))
908 ;
909 while(ch!=EOF && ch!='\n') {
910 add1_buffer(b,ch);
911 ch=inchar();
912 }
913 savchar(ch);
914 add1_buffer(b,'\0');
915 tmp=(struct sed_label *)ck_malloc(sizeof(struct sed_label));
916 tmp->v=vec;
917 tmp->v_index=cmd-vec->v;
918 tmp->name=ck_strdup(get_buffer(b));
919 tmp->next=list;
920 flush_buffer(b);
921 return tmp;
922}
923
924/* read in a filename for a 'r', 'w', or 's///w' command, and
925 update the internal structure about files. The file is
926 opened if it isn't already open. */
927FILE *
928compile_filename(readit)
929 int readit;
930{
931 char *file_name;
932 int n;
933 VOID *b;
934 int ch;
935
936 if(inchar()!=' ')
937 bad_prog("missing ' ' before filename");
938 b=init_buffer();
939 while((ch=inchar())!=EOF && ch!='\n')
940 add1_buffer(b,ch);
941 add1_buffer(b,'\0');
942 file_name=get_buffer(b);
943 for(n=0;n<NUM_FPS;n++) {
944 if(!file_ptrs[n].name)
945 break;
946 if(!strcmp(file_ptrs[n].name,file_name)) {
947 if(file_ptrs[n].readit!=readit)
948 bad_prog("Can't open file for both reading and writing");
949 flush_buffer(b);
950 return file_ptrs[n].phile;
951 }
952 }
953 if(n<NUM_FPS) {
954 file_ptrs[n].name=ck_strdup(file_name);
955 file_ptrs[n].readit=readit;
956 if (!readit)
957 file_ptrs[n].phile=ck_fopen(file_name,"a");
958 else if (access(file_name, 4) == 0)
959 file_ptrs[n].phile=ck_fopen(file_name,"r");
960 else
961 file_ptrs[n].phile=ck_fopen("/dev/null", "r");
962 flush_buffer(b);
963 return file_ptrs[n].phile;
964 } else {
965 bad_prog("Hopelessely evil compiled in limit on number of open files. re-compile sed");
966 return 0;
967 }
968}
969
970/* Parse a filename given by a 'r' 'w' or 's///w' command. */
971void
972read_file(name)
973char *name;
974{
975 if(*name=='-' && name[1]=='\0')
976 input_file=stdin;
977 else {
978 input_file=fopen(name,"r");
979 if(input_file==0) {
980 extern int errno;
981 extern char *sys_errlist[];
982 extern int sys_nerr;
983
984 char *ptr;
985
986 ptr=(errno>=0 && errno<sys_nerr) ? sys_errlist[errno] : "Unknown error code";
987 bad_input++;
988 fprintf(stderr,"%s: can't read %s: %s\n",myname,name,ptr);
989
990 return;
991 }
992 }
993 while(read_pattern_space()) {
994 execute_program(the_program);
995 if(!no_default_output)
996 ck_fwrite(line.text,1,line.length,stdout);
997 if(append.length) {
998 ck_fwrite(append.text,1,append.length,stdout);
999 append.length=0;
1000 }
1001 if(quit_cmd)
1002 break;
1003 }
1004 ck_fclose(input_file);
1005}
1006
1007/* Execute the program 'vec' on the current input line. */
1008void
1009execute_program(vec)
1010struct vector *vec;
1011{
1012 struct sed_cmd *cur_cmd;
1013 int n;
1014 int addr_matched;
1015 static int end_cycle;
1016
1017 int start;
1018 int remain;
1019 int offset;
1020
1021 static struct line tmp;
1022 struct line t;
1023 char *rep,*rep_end,*rep_next,*rep_cur;
1024
1025 struct re_registers regs;
1026 int count = 0;
1027
1028 end_cycle = 0;
1029
1030 for(cur_cmd=vec->v,n=vec->v_length;n;cur_cmd++,n--) {
1031
1032 exe_loop:
1033 addr_matched=0;
1034 if(cur_cmd->aflags&A1_MATCHED_BIT) {
1035 addr_matched=1;
1036 if(match_address(&(cur_cmd->a2)))
1037 cur_cmd->aflags&=~A1_MATCHED_BIT;
1038 } else if(match_address(&(cur_cmd->a1))) {
1039 addr_matched=1;
1040 if(cur_cmd->a2.addr_type!=ADDR_NULL)
1041 cur_cmd->aflags|=A1_MATCHED_BIT;
1042 }
1043 if(cur_cmd->aflags&ADDR_BANG_BIT)
1044 addr_matched= !addr_matched;
1045 if(!addr_matched)
1046 continue;
1047 switch(cur_cmd->cmd) {
1048 case '{': /* Execute sub-program */
1049 execute_program(cur_cmd->x.sub);
1050 break;
1051
1052 case ':': /* Executing labels is easy. */
1053 break;
1054
1055 case '=':
1056 printf("%d\n",input_line_number);
1057 break;
1058
1059 case 'a':
1060 while(append.alloc-append.length<cur_cmd->x.cmd_txt.text_len) {
1061 append.alloc *= 2;
1062 append.text=ck_realloc(append.text,append.alloc);
1063 }
1064 bcopy(cur_cmd->x.cmd_txt.text,append.text+append.length,cur_cmd->x.cmd_txt.text_len);
1065 append.length+=cur_cmd->x.cmd_txt.text_len;
1066 break;
1067
1068 case 'b':
1069 if(!cur_cmd->x.jump)
1070 end_cycle++;
1071 else {
1072 struct sed_label *j = cur_cmd->x.jump;
1073
1074 n= j->v->v_length - j->v_index;
1075 cur_cmd= j->v->v + j->v_index;
1076 goto exe_loop;
1077 }
1078 break;
1079
1080 case 'c':
1081 line.length=0;
1082 if(!(cur_cmd->aflags&A1_MATCHED_BIT))
1083 ck_fwrite(cur_cmd->x.cmd_txt.text,1,cur_cmd->x.cmd_txt.text_len,stdout);
1084 end_cycle++;
1085 break;
1086
1087 case 'd':
1088 line.length=0;
1089 end_cycle++;
1090 break;
1091
1092 case 'D':
1093 {
1094 char *tmp;
1095 int newlength;
1096
1097 tmp=memchr(line.text,'\n',line.length);
1098 newlength=line.length-(tmp-line.text);
1099 if(newlength)
1100 memmove(line.text,tmp,newlength);
1101 line.length=newlength;
1102 }
1103 end_cycle++;
1104 break;
1105
1106 case 'g':
1107 line_copy(&hold,&line);
1108 break;
1109
1110 case 'G':
1111 line_append(&hold,&line);
1112 break;
1113
1114 case 'h':
1115 line_copy(&line,&hold);
1116 break;
1117
1118 case 'H':
1119 line_append(&line,&hold);
1120 break;
1121
1122 case 'i':
1123 ck_fwrite(cur_cmd->x.cmd_txt.text,1,cur_cmd->x.cmd_txt.text_len,stdout);
1124 break;
1125
1126 case 'l':
1127 {
1128 char *tmp;
1129 int n;
1130 int width = 0;
1131
1132 n=line.length;
1133 tmp=line.text;
1134 /* Use --n so this'll skip the trailing newline */
1135 while(--n) {
1136 if(width>77) {
1137 width=0;
1138 putchar('\n');
1139 }
1140 if(*tmp == '\\') {
1141 printf("\\\\");
1142 width+=2;
1143 } else if(isprint(*tmp)) {
1144 putchar(*tmp);
1145 width++;
1146 } else switch(*tmp) {
1147#if 0
1148 /* Should print \00 instead of \0 because (a) POSIX requires it, and
1149 (b) this way \01 is unambiguous. */
1150 case '\0':
1151 printf("\\0");
1152 width+=2;
1153 break;
1154#endif
1155 case 007:
1156 printf("\\a");
1157 width+=2;
1158 break;
1159 case '\b':
1160 printf("\\b");
1161 width+=2;
1162 break;
1163 case '\f':
1164 printf("\\f");
1165 width+=2;
1166 break;
1167 case '\n':
1168 printf("\\n");
1169 width+=2;
1170 break;
1171 case '\r':
1172 printf("\\r");
1173 width+=2;
1174 break;
1175 case '\t':
1176 printf("\\t");
1177 width+=2;
1178 break;
1179 case '\v':
1180 printf("\\v");
1181 width+=2;
1182 break;
1183 default:
1184 printf("\\%02x",(*tmp)&0xFF);
1185 width+=2;
1186 break;
1187 }
1188 tmp++;
1189 }
1190 putchar('\n');
1191 }
1192 break;
1193
1194 case 'n':
1195 if (feof(input_file)) goto quit;
1196 ck_fwrite(line.text,1,line.length,stdout);
1197 read_pattern_space();
1198 break;
1199
1200 case 'N':
1201 if (feof(input_file)) goto quit;
1202 append_pattern_space();
1203 break;
1204
1205 case 'p':
1206 ck_fwrite(line.text,1,line.length,stdout);
1207 break;
1208
1209 case 'P':
1210 {
1211 char *tmp;
1212
1213 tmp=memchr(line.text,'\n',line.length);
1214 ck_fwrite(line.text, 1,
1215 tmp ? tmp - line.text + 1
1216 : line.length, stdout);
1217 }
1218 break;
1219
1220 case 'q': quit:
1221 quit_cmd++;
1222 end_cycle++;
1223 break;
1224
1225 case 'r':
1226 {
1227 int n = 0;
1228
1229 rewind(cur_cmd->x.io_file);
1230 do {
1231 append.length += n;
1232 if(append.length==append.alloc) {
1233 append.alloc *= 2;
1234 append.text = ck_realloc(append.text, append.alloc);
1235 }
1236 } while((n=fread(append.text+append.length,sizeof(char),append.alloc-append.length,cur_cmd->x.io_file))>0);
1237 if(ferror(cur_cmd->x.io_file))
1238 panic("Read error on input file to 'r' command");
1239 }
1240 break;
1241
1242 case 's':
1243 if(!tmp.alloc) {
1244 tmp.alloc=50;
1245 tmp.text=ck_malloc(50);
1246 }
1247 count=0;
1248 start = 0;
1249 remain=line.length-1;
1250 tmp.length=0;
1251 rep = cur_cmd->x.cmd_regex.replacement;
1252 rep_end=rep+cur_cmd->x.cmd_regex.replace_length;
1253
1254 while((offset = re_search(cur_cmd->x.cmd_regex.regx,
1255 line.text,
1256 line.length-1,
1257 start,
1258 remain,
1259 &regs))>=0) {
1260 count++;
1261 if(offset-start)
1262 str_append(&tmp,line.text+start,offset-start);
1263
1264 if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT) {
1265 if(count!=cur_cmd->x.cmd_regex.numb) {
1266 str_append(&tmp,line.text+regs.start[0],regs.end[0]-regs.start[0]);
1267 start = (offset == regs.end[0] ? offset + 1 : regs.end[0]);
1268 remain = (line.length-1) - start;
1269 continue;
1270 }
1271 }
1272
1273 for(rep_next=rep_cur=rep;rep_next<rep_end;rep_next++) {
1274 if(*rep_next=='&') {
1275 if(rep_next-rep_cur)
1276 str_append(&tmp,rep_cur,rep_next-rep_cur);
1277 str_append(&tmp,line.text+regs.start[0],regs.end[0]-regs.start[0]);
1278 rep_cur=rep_next+1;
1279 } else if(*rep_next=='\\') {
1280 if(rep_next-rep_cur)
1281 str_append(&tmp,rep_cur,rep_next-rep_cur);
1282 rep_next++;
1283 if(rep_next!=rep_end) {
1284 int n;
1285
1286 if(*rep_next>='0' && *rep_next<='9') {
1287 n= *rep_next -'0';
1288 str_append(&tmp,line.text+regs.start[n],regs.end[n]-regs.start[n]);
1289 } else
1290 str_append(&tmp,rep_next,1);
1291 }
1292 rep_cur=rep_next+1;
1293 }
1294 }
1295 if(rep_next-rep_cur)
1296 str_append(&tmp,rep_cur,rep_next-rep_cur);
1297 if (offset == regs.end[0]) {
1298 str_append(&tmp, line.text + offset, 1);
1299 ++regs.end[0];
1300 }
1301 start = regs.end[0];
1302
1303 remain = (line.length-1) - start;
1304 if(remain<0)
1305 break;
1306 if(!(cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT))
1307 break;
1308 }
1309 if(!count)
1310 break;
1311 replaced=1;
1312 str_append(&tmp,line.text+start,remain+1);
1313 t.text=line.text;
1314 t.length=line.length;
1315 t.alloc=line.alloc;
1316 line.text=tmp.text;
1317 line.length=tmp.length;
1318 line.alloc=tmp.alloc;
1319 tmp.text=t.text;
1320 tmp.length=t.length;
1321 tmp.alloc=t.alloc;
1322 if(cur_cmd->x.cmd_regex.flags&S_WRITE_BIT)
1323 ck_fwrite(line.text,1,line.length,cur_cmd->x.cmd_regex.wio_file);
1324 if(cur_cmd->x.cmd_regex.flags&S_PRINT_BIT)
1325 ck_fwrite(line.text,1,line.length,stdout);
1326 break;
1327
1328 case 't':
1329 if(replaced) {
1330 replaced = 0;
1331 if(!cur_cmd->x.jump)
1332 end_cycle++;
1333 else {
1334 struct sed_label *j = cur_cmd->x.jump;
1335
1336 n= j->v->v_length - j->v_index;
1337 cur_cmd= j->v->v + j->v_index;
1338 goto exe_loop;
1339 }
1340 }
1341 break;
1342
1343 case 'w':
1344 ck_fwrite(line.text,1,line.length,cur_cmd->x.io_file);
1345 break;
1346
1347 case 'x':
1348 {
1349 struct line tmp;
1350
1351 tmp=line;
1352 line=hold;
1353 hold=tmp;
1354 }
1355 break;
1356
1357 case 'y':
1358 {
1359 unsigned char *p,*e;
1360
1361 for(p=(unsigned char *)(line.text),e=p+line.length;p<e;p++)
1362 *p=cur_cmd->x.translate[*p];
1363 }
1364 break;
1365
1366 default:
1367 panic("INTERNAL ERROR: Bad cmd %c",cur_cmd->cmd);
1368 }
1369 if(end_cycle)
1370 break;
1371 }
1372}
1373
1374
1375/* Return non-zero if the current line matches the address
1376 pointed to by 'addr'. */
1377int
1378match_address(addr)
1379struct addr *addr;
1380{
1381 switch(addr->addr_type) {
1382 case ADDR_NULL:
1383 return 1;
1384 case ADDR_NUM:
1385 return (input_line_number==addr->addr_number);
1386
1387 case ADDR_REGEX:
1388 return (re_search(addr->addr_regex,
1389 line.text,
1390 line.length-1,
1391 0,
1392 line.length-1,
1393 (struct re_registers *)0)>=0) ? 1 : 0;
1394
1395 case ADDR_LAST:
1396 return (input_EOF) ? 1 : 0;
1397
1398 default:
1399 panic("INTERNAL ERROR: bad address type");
1400 break;
1401 }
1402 return -1;
1403}
1404
1405/* Read in the next line of input, and store it in the
1406 pattern space. Return non-zero if this is the last line of input */
1407
1408int
1409read_pattern_space()
1410{
1411 int n;
1412 char *p;
1413 int ch;
1414
1415 p=line.text;
1416 n=line.alloc;
1417
1418 if(feof(input_file))
1419 return 0;
1420 input_line_number++;
1421 replaced=0;
1422 for(;;) {
1423 if(n==0) {
1424 line.text=ck_realloc(line.text,line.alloc*2);
1425 p=line.text+line.alloc;
1426 n=line.alloc;
1427 line.alloc*=2;
1428 }
1429 ch=getc(input_file);
1430 if(ch==EOF) {
1431 if(n==line.alloc)
1432 return 0;
1433 *p++='\n';
1434 --n;
1435 line.length=line.alloc-n;
1436 if(last_input_file)
1437 input_EOF++;
1438 return 1;
1439 }
1440 *p++=ch;
1441 --n;
1442 if(ch=='\n') {
1443 line.length=line.alloc-n;
1444 break;
1445 }
1446 }
1447 ch=getc(input_file);
1448 if(ch!=EOF)
1449 ungetc(ch,input_file);
1450 else if(last_input_file)
1451 input_EOF++;
1452 return 1;
1453}
1454
1455/* Inplement the 'N' command, which appends the next line of input to
1456 the pattern space. */
1457void
1458append_pattern_space()
1459{
1460 char *p;
1461 int n;
1462 int ch;
1463
1464 p=line.text+line.length;
1465 n=line.alloc-line.length;
1466
1467 input_line_number++;
1468 replaced=0;
1469 for(;;) {
1470 ch=getc(input_file);
1471 if(ch==EOF) {
1472 if(n==line.alloc)
1473 return;
1474 *p++='\n';
1475 --n;
1476 line.length=line.alloc-n;
1477 if(last_input_file)
1478 input_EOF++;
1479 return;
1480 }
1481 *p++=ch;
1482 --n;
1483 if(ch=='\n') {
1484 line.length=line.alloc-n;
1485 break;
1486 }
1487 if(n==0) {
1488 line.text=ck_realloc(line.text,line.alloc*2);
1489 p=line.text+line.alloc;
1490 n=line.alloc;
1491 line.alloc*=2;
1492 }
1493 }
1494 ch=getc(input_file);
1495 if(ch!=EOF)
1496 ungetc(ch,input_file);
1497 else if(last_input_file)
1498 input_EOF++;
1499}
1500
1501/* Copy the contents of the line 'from' into the line 'to'.
1502 This destroys the old contents of 'to'. It will still work
1503 if the line 'from' contains nulls. */
1504void
1505line_copy(from,to)
1506struct line *from,*to;
1507{
1508 if(from->length>to->alloc) {
1509 to->alloc=from->length;
1510 to->text=ck_realloc(to->text,to->alloc);
1511 }
1512 bcopy(from->text,to->text,from->length);
1513 to->length=from->length;
1514}
1515
1516/* Append the contents of the line 'from' to the line 'to'.
1517 This routine will work even if the line 'from' contains nulls */
1518void
1519line_append(from,to)
1520struct line *from,*to;
1521{
1522 if(from->length>(to->alloc-to->length)) {
1523 to->alloc+=from->length;
1524 to->text=ck_realloc(to->text,to->alloc);
1525 }
1526 bcopy(from->text,to->text+to->length,from->length);
1527 to->length+=from->length;
1528}
1529
1530/* Append 'length' bytes from 'string' to the line 'to'
1531 This routine *will* append bytes with nulls in them, without
1532 failing. */
1533void
1534str_append(to,string,length)
1535struct line *to;
1536char *string;
1537int length;
1538{
1539 if(length>to->alloc-to->length) {
1540 to->alloc+=length;
1541 to->text=ck_realloc(to->text,to->alloc);
1542 }
1543 bcopy(string,to->text+to->length,length);
1544 to->length+=length;
1545}
1546
1547void
1548usage()
1549{
1550 fprintf(stderr, "\
1551Usage: %s [-nV] [+quiet] [+silent] [+version] [-e script] [-f script-file]\n\
1552 [+expression=script] [+file=script-file] [file...]\n", myname);
1553 exit(4);
1554}