/* GNU SED, a batch stream editor.
Copyright (C) 1989-1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define isblank(c) ((c) == ' ' || (c) == '\t')
#if defined(STDC_HEADERS)
#if defined(USG) || defined(STDC_HEADERS)
#define bcopy(s, d, n) (memcpy((d), (s), (n)))
char *version_string
= "GNU sed version 1.08";
/* Struct vector is used to describe a chunk of a sed program. There is one
vector for the main program, and one for each { } pair. */
/* Goto structure is used to hold both GOTO's and labels. There are two
separate lists, one of goto's, called 'jumps', and one of labels, called
the V element points to the descriptor for the program-chunk in which the
the v_index element counts which element of the vector actually IS the
goto/label. The first element of the vector is zero.
the NAME element is the null-terminated name of the label.
next is the next goto/label in the list. */
/* ADDR_TYPE is zero for a null address,
one if addr_number is valid, or
two if addr_regex is valid,
three, if the address is '$'
Other values are undefined.
struct re_pattern_buffer
*addr_regex
;
/* Aflags: If the low order bit is set, a1 has been
matched; apply this command until a2 matches.
If the next bit is set, apply this command to all
lines that DON'T match the address(es).
#define A1_MATCHED_BIT 01
/* This structure is used for a, i, and c commands */
/* This is used for b and t commands */
/* This for r and w commands */
/* This for the hairy s command */
low order bit means the 'g' option was given,
next bit means the 'p' option was given,
and the next bit means a 'w' option was given,
and wio_file contains the file to write to. */
struct re_pattern_buffer
*regx
;
/* This for the y command */
unsigned char *translate
;
/* Sed operates a line at a time. */
char *text
; /* Pointer to line allocated by malloc. */
int length
; /* Length of text. */
int alloc
; /* Allocated space for text. */
/* This structure holds information about files opend by the 'r', 'w',
and 's///w' commands. In paticular, it holds the FILE pointer to
use, the file's name, a flag that is non-zero if the file is being
read instead of written. */
void panic
P_((char *str
, ...));
char *__fp_name
P_((FILE *fp
));
FILE *ck_fopen
P_((char *name
, char *mode
));
void ck_fwrite
P_((char *ptr
, int size
, int nmemb
, FILE *stream
));
void ck_fclose
P_((FILE *stream
));
VOID
*ck_malloc
P_((int size
));
VOID
*ck_realloc
P_((VOID
*ptr
, int size
));
char *ck_strdup
P_((char *str
));
VOID
*init_buffer
P_((void));
void flush_buffer
P_((VOID
*bb
));
int size_buffer
P_((VOID
*b
));
void add_buffer
P_((VOID
*bb
, char *p
, int n
));
void add1_buffer
P_((VOID
*bb
, int ch
));
char *get_buffer
P_((VOID
*bb
));
void compile_string
P_((char *str
));
void compile_file
P_((char *str
));
struct vector
*compile_program
P_((struct vector
*vector
));
void bad_prog
P_((char *why
));
void savchar
P_((int ch
));
int compile_address
P_((struct addr
*addr
));
void compile_regex
P_((int slash
));
struct sed_label
*setup_jump
P_((struct sed_label
*list
, struct sed_cmd
*cmd
, struct vector
*vec
));
FILE *compile_filename
P_((int readit
));
void read_file
P_((char *name
));
void execute_program
P_((struct vector
*vec
));
int match_address
P_((struct addr
*addr
));
int read_pattern_space
P_((void));
void append_pattern_space
P_((void));
void line_copy
P_((struct line
*from
, struct line
*to
));
void line_append
P_((struct line
*from
, struct line
*to
));
void str_append
P_((struct line
*to
, char *string
, int length
));
/* If set, don't write out the line unless explictly told to */
int no_default_output
= 0;
/* Current input line # */
int input_line_number
= 0;
/* Are we on the last input file? */
/* Have we hit EOF on the last input file? This is used to decide if we
have hit the '$' address yet. */
/* non-zero if a quit command has been executed. */
/* Have we done any replacements lately? This is used by the 't' command. */
/* How many '{'s are we executing at the moment */
/* The complete compiled SED program that we are going to run */
struct vector
*the_program
= 0;
/* information about labels and jumps-to-labels. This is used to do
the required backpatching after we have compiled all the scripts. */
struct sed_label
*jumps
= 0;
struct sed_label
*labels
= 0;
/* The 'current' input line. */
/* An input line that's been stored by later use by the program */
/* A 'line' to append to the current line when it comes time to write it out */
/* When we're reading a script command from a string, 'prog_start' and
'prog_end' point to the beginning and end of the string. This
would allow us to compile script strings that contain nulls, except
that script strings are only read from the command line, which is
/* When we're reading a script command from a string, 'prog_cur' points
to the current character in the string */
/* This is the name of the current script file.
It is used for error messages. */
/* This is the current script file. If it is zero, we are reading
from a string stored in 'prog_start' instead. If both 'prog_file'
and 'prog_start' are zero, we're in trouble! */
/* this is the number of the current script line that we're compiling. It is
used to give out useful and informative error messages. */
/* This is the file pointer that we're currently reading data from. It may
/* If this variable is non-zero at exit, one or more of the input
files couldn't be opened. */
/* 'an empty regular expression is equivalent to the last regular
expression read' so we have to keep track of the last regex used.
Here's where we store a pointer to it (it is only malloc()'d once) */
struct re_pattern_buffer
*last_regex
;
/* Various error messages we may want to print */
static char ONE_ADDR
[] = "Command only uses one address";
static char NO_ADDR
[] = "Command doesn't take any addresses";
static char LINE_JUNK
[] = "Extra characters after command";
static char BAD_EOF
[] = "Unexpected End-of-file";
static char NO_REGEX
[] = "No previous regular expression";
static struct option longopts
[] =
{"expression", 1, NULL
, 'e'},
{"silent", 0, NULL
, 'n'},
{"version", 0, NULL
, 'V'},
/* Yes, the main program, which parses arguments, and does the right
thing with them; it also inits the temporary storage, etc. */
struct sed_label
*go
,*lbl
;
while((opt
=getopt_long(argc
,argv
,"ne:f:V", longopts
, (int *) 0))
e_strings
=ck_malloc(strlen(optarg
)+2);
strcpy(e_strings
,optarg
);
e_strings
=ck_realloc(e_strings
,strlen(e_strings
)+strlen(optarg
)+2);
strcat(e_strings
,optarg
);
fprintf(stderr
, "%s\n", version_string
);
compile_string(e_strings
);
compile_string(argv
[optind
++]);
for(go
=jumps
;go
;go
=go
->next
) {
for(lbl
=labels
;lbl
;lbl
=lbl
->next
)
if(!strcmp(lbl
->name
,go
->name
))
panic("Can't find label for jump to '%s'",go
->name
);
go
->v
->v
[go
->v_index
].x
.jump
=lbl
;
append
.text
=ck_malloc(50);
} else while(optind
<argc
) {
/* 'str' is a string (from the command line) that contains a sed command.
Compile the command, and add it to the end of 'the_program' */
prog_end
=str
+strlen(str
);
the_program
=compile_program(the_program
);
/* 'str' is the name of a file containing sed commands. Read them in
and add them to the end of 'the_program' */
prog_start
=prog_cur
=prog_end
=0;
if(str
[0]=='-' && str
[1]=='\0')
prog_file
=ck_fopen(str
,"r");
while(ch
!=EOF
&& ch
!='\n')
the_program
=compile_program(the_program
);
/* Read a program (or a subprogram within '{' '}' pairs) in and store
the compiled form in *'vector' Return a pointer to the new vector. */
vector
=(struct vector
*)ck_malloc(sizeof(struct vector
));
vector
->v
=(struct sed_cmd
*)ck_malloc(MORE_CMDS
*sizeof(struct sed_cmd
));
vector
->v_allocated
=MORE_CMDS
;
while(ch
!=EOF
&& (isblank(ch
) || ch
=='\n' || ch
==';'));
if(vector
->v_length
==vector
->v_allocated
) {
vector
->v
=(struct sed_cmd
*)ck_realloc((VOID
*)vector
->v
,(vector
->v_length
+MORE_CMDS
)*sizeof(struct sed_cmd
));
vector
->v_allocated
+=MORE_CMDS
;
cur_cmd
=vector
->v
+vector
->v_length
;
if(compile_address(&(cur_cmd
->a1
))) {
while(ch
!=EOF
&& isblank(ch
));
if(compile_address(&(cur_cmd
->a2
)))
bad_prog("Unexpected ','");
if(cur_cmd
->a1
.addr_type
!=0)
while(ch
!=EOF
&& ch
!='\n');
if(cur_cmd
->aflags
& ADDR_BANG_BIT
)
bad_prog("Multiple '!'s");
cur_cmd
->aflags
|= ADDR_BANG_BIT
;
while(ch
!=EOF
&& isblank(ch
));
if(cur_cmd
->a2
.addr_type
!=0)
if(inchar()!='\\' || inchar()!='\n')
while((ch
=inchar())!=EOF
&& ch
!='\n') {
string
=(unsigned char *)ck_malloc(num
);
bcopy(get_buffer(b
),string
,num
);
cur_cmd
->x
.cmd_txt
.text_len
=num
;
cur_cmd
->x
.cmd_txt
.text
=(char *)string
;
while((ch
=inchar())!=EOF
&& ch
!='\n')
cur_cmd
->x
.sub
=compile_program((struct vector
*)0);
/* FOO JF is this the right thing to do? */
bad_prog("Unexpected '}'");
while((ch
=inchar())!=EOF
&& ch
!='\n' && ch
!=';')
if(cur_cmd
->a1
.addr_type
!=0)
bad_prog(": doesn't want any addresses");
labels
=setup_jump(labels
,cur_cmd
,vector
);
jumps
=setup_jump(jumps
,cur_cmd
,vector
);
if(cur_cmd
->a2
.addr_type
)
while(ch
!=EOF
&& isblank(ch
) && ch
!='\n' && ch
!=';');
if(ch
!='\n' && ch
!=';' && ch
!=EOF
)
if(cur_cmd
->a2
.addr_type
!=0)
cur_cmd
->x
.io_file
=compile_filename(ch
=='r');
cur_cmd
->x
.cmd_regex
.regx
=last_regex
;
while((ch
=inchar())!=EOF
&& ch
!=slash
) {
cur_cmd
->x
.cmd_regex
.replace_length
=size_buffer(b
);
cur_cmd
->x
.cmd_regex
.replacement
=ck_malloc(cur_cmd
->x
.cmd_regex
.replace_length
);
bcopy(get_buffer(b
),cur_cmd
->x
.cmd_regex
.replacement
,cur_cmd
->x
.cmd_regex
.replace_length
);
cur_cmd
->x
.cmd_regex
.flags
=0;
cur_cmd
->x
.cmd_regex
.numb
=0;
if(cur_cmd
->x
.cmd_regex
.flags
&S_PRINT_BIT
)
bad_prog("multiple 'p' options to 's' command");
cur_cmd
->x
.cmd_regex
.flags
|=S_PRINT_BIT
;
if(cur_cmd
->x
.cmd_regex
.flags
&S_NUM_BIT
)
cur_cmd
->x
.cmd_regex
.flags
&= ~S_NUM_BIT
;
if(cur_cmd
->x
.cmd_regex
.flags
&S_GLOBAL_BIT
)
bad_prog("multiple 'g' options to 's' command");
cur_cmd
->x
.cmd_regex
.flags
|=S_GLOBAL_BIT
;
cur_cmd
->x
.cmd_regex
.flags
|=S_WRITE_BIT
;
cur_cmd
->x
.cmd_regex
.wio_file
=compile_filename(0);
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
if(cur_cmd
->x
.cmd_regex
.flags
&S_NUM_BIT
)
bad_prog("multiple number options to 's' command");
if((cur_cmd
->x
.cmd_regex
.flags
&S_GLOBAL_BIT
)==0)
cur_cmd
->x
.cmd_regex
.flags
|=S_NUM_BIT
;
cur_cmd
->x
.cmd_regex
.numb
=num
;
bad_prog("Unknown option to 's'");
} while(ch
!=EOF
&& ch
!='\n' && ch
!=';');
string
=(unsigned char *)ck_malloc(256);
while((ch
=inchar())!=EOF
&& ch
!=slash
)
cur_cmd
->x
.translate
=string
;
string
=(unsigned char *)get_buffer(b
);
for(num
=size_buffer(b
);num
;--num
) {
bad_prog("strings for y command are different lengths");
cur_cmd
->x
.translate
[*string
++]=ch
;
if(inchar()!=slash
|| ((ch
=inchar())!=EOF
&& ch
!='\n' && ch
!=';'))
bad_prog("Unknown command");
/* Complain about a programming error and exit. */
fprintf(stderr
,"%s: file %s line %d: %s\n",myname
,prog_name
,prog_line
,why
);
fprintf(stderr
,"%s: %s\n",myname
,why
);
/* Read the next character from the program. Return EOF if there isn't
anything to read. Keep prog_line up to date, so error messages can
else if(prog_cur
==prog_end
) {
if(ch
=='\n' && prog_line
)
/* unget 'ch' so the next call to inchar will return it. 'ch' must not be
EOF or anything nasty like that. */
if(ch
=='\n' && prog_line
>1)
/* Try to read an address for a sed command. If it succeeeds,
return non-zero and store the resulting address in *'addr'.
If the input doesn't look like an address read nothing
while((ch
=inchar())!=EOF
&& isdigit(ch
))
while(ch
!=EOF
&& isblank(ch
))
addr
->addr_type
=ADDR_NUM
;
addr
->addr_type
=ADDR_REGEX
;
addr
->addr_regex
=last_regex
;
while(ch
!=EOF
&& isblank(ch
));
addr
->addr_type
=ADDR_LAST
;
while(ch
!=EOF
&& isblank(ch
));
while((ch
=inchar())!=EOF
&& (ch
!=slash
|| in_char_class
)) {
last_regex
=(struct re_pattern_buffer
*)ck_malloc(sizeof(struct re_pattern_buffer
));
last_regex
->allocated
=size_buffer(b
)+10;
last_regex
->buffer
=ck_malloc(last_regex
->allocated
);
last_regex
->fastmap
=ck_malloc(256);
re_compile_pattern(get_buffer(b
),size_buffer(b
),last_regex
);
/* Store a label (or label reference) created by a ':', 'b', or 't'
comand so that the jump to/from the lable can be backpatched after
compilation is complete */
while((ch
=inchar()) != EOF
&& isblank(ch
))
while(ch
!=EOF
&& ch
!='\n') {
tmp
=(struct sed_label
*)ck_malloc(sizeof(struct sed_label
));
tmp
->name
=ck_strdup(get_buffer(b
));
/* read in a filename for a 'r', 'w', or 's///w' command, and
update the internal structure about files. The file is
opened if it isn't already open. */
bad_prog("missing ' ' before filename");
while((ch
=inchar())!=EOF
&& ch
!='\n')
if(!strcmp(file_ptrs
[n
].name
,file_name
)) {
if(file_ptrs
[n
].readit
!=readit
)
bad_prog("Can't open file for both reading and writing");
return file_ptrs
[n
].phile
;
file_ptrs
[n
].name
=ck_strdup(file_name
);
file_ptrs
[n
].readit
=readit
;
file_ptrs
[n
].phile
=ck_fopen(file_name
,"a");
else if (access(file_name
, 4) == 0)
file_ptrs
[n
].phile
=ck_fopen(file_name
,"r");
file_ptrs
[n
].phile
=ck_fopen("/dev/null", "r");
return file_ptrs
[n
].phile
;
bad_prog("Hopelessely evil compiled in limit on number of open files. re-compile sed");
/* Parse a filename given by a 'r' 'w' or 's///w' command. */
if(*name
=='-' && name
[1]=='\0')
input_file
=fopen(name
,"r");
extern char *sys_errlist
[];
ptr
=(errno
>=0 && errno
<sys_nerr
) ? sys_errlist
[errno
] : "Unknown error code";
fprintf(stderr
,"%s: can't read %s: %s\n",myname
,name
,ptr
);
while(read_pattern_space()) {
execute_program(the_program
);
ck_fwrite(line
.text
,1,line
.length
,stdout
);
ck_fwrite(append
.text
,1,append
.length
,stdout
);
/* Execute the program 'vec' on the current input line. */
char *rep
,*rep_end
,*rep_next
,*rep_cur
;
struct re_registers regs
;
for(cur_cmd
=vec
->v
,n
=vec
->v_length
;n
;cur_cmd
++,n
--) {
if(cur_cmd
->aflags
&A1_MATCHED_BIT
) {
if(match_address(&(cur_cmd
->a2
)))
cur_cmd
->aflags
&=~A1_MATCHED_BIT
;
} else if(match_address(&(cur_cmd
->a1
))) {
if(cur_cmd
->a2
.addr_type
!=ADDR_NULL
)
cur_cmd
->aflags
|=A1_MATCHED_BIT
;
if(cur_cmd
->aflags
&ADDR_BANG_BIT
)
addr_matched
= !addr_matched
;
case '{': /* Execute sub-program */
execute_program(cur_cmd
->x
.sub
);
case ':': /* Executing labels is easy. */
printf("%d\n",input_line_number
);
while(append
.alloc
-append
.length
<cur_cmd
->x
.cmd_txt
.text_len
) {
append
.text
=ck_realloc(append
.text
,append
.alloc
);
bcopy(cur_cmd
->x
.cmd_txt
.text
,append
.text
+append
.length
,cur_cmd
->x
.cmd_txt
.text_len
);
append
.length
+=cur_cmd
->x
.cmd_txt
.text_len
;
struct sed_label
*j
= cur_cmd
->x
.jump
;
n
= j
->v
->v_length
- j
->v_index
;
cur_cmd
= j
->v
->v
+ j
->v_index
;
if(!(cur_cmd
->aflags
&A1_MATCHED_BIT
))
ck_fwrite(cur_cmd
->x
.cmd_txt
.text
,1,cur_cmd
->x
.cmd_txt
.text_len
,stdout
);
tmp
=memchr(line
.text
,'\n',line
.length
);
newlength
=line
.length
-(tmp
-line
.text
);
memmove(line
.text
,tmp
,newlength
);
line_append(&hold
,&line
);
line_append(&line
,&hold
);
ck_fwrite(cur_cmd
->x
.cmd_txt
.text
,1,cur_cmd
->x
.cmd_txt
.text_len
,stdout
);
/* Use --n so this'll skip the trailing newline */
} else if(isprint(*tmp
)) {
/* Should print \00 instead of \0 because (a) POSIX requires it, and
(b) this way \01 is unambiguous. */
printf("\\%02x",(*tmp
)&0xFF);
if (feof(input_file
)) goto quit
;
ck_fwrite(line
.text
,1,line
.length
,stdout
);
if (feof(input_file
)) goto quit
;
ck_fwrite(line
.text
,1,line
.length
,stdout
);
tmp
=memchr(line
.text
,'\n',line
.length
);
tmp
? tmp
- line
.text
+ 1
rewind(cur_cmd
->x
.io_file
);
if(append
.length
==append
.alloc
) {
append
.text
= ck_realloc(append
.text
, append
.alloc
);
} while((n
=fread(append
.text
+append
.length
,sizeof(char),append
.alloc
-append
.length
,cur_cmd
->x
.io_file
))>0);
if(ferror(cur_cmd
->x
.io_file
))
panic("Read error on input file to 'r' command");
rep
= cur_cmd
->x
.cmd_regex
.replacement
;
rep_end
=rep
+cur_cmd
->x
.cmd_regex
.replace_length
;
while((offset
= re_search(cur_cmd
->x
.cmd_regex
.regx
,
str_append(&tmp
,line
.text
+start
,offset
-start
);
if(cur_cmd
->x
.cmd_regex
.flags
&S_NUM_BIT
) {
if(count
!=cur_cmd
->x
.cmd_regex
.numb
) {
str_append(&tmp
,line
.text
+regs
.start
[0],regs
.end
[0]-regs
.start
[0]);
start
= (offset
== regs
.end
[0] ? offset
+ 1 : regs
.end
[0]);
remain
= (line
.length
-1) - start
;
for(rep_next
=rep_cur
=rep
;rep_next
<rep_end
;rep_next
++) {
str_append(&tmp
,rep_cur
,rep_next
-rep_cur
);
str_append(&tmp
,line
.text
+regs
.start
[0],regs
.end
[0]-regs
.start
[0]);
} else if(*rep_next
=='\\') {
str_append(&tmp
,rep_cur
,rep_next
-rep_cur
);
if(*rep_next
>='0' && *rep_next
<='9') {
str_append(&tmp
,line
.text
+regs
.start
[n
],regs
.end
[n
]-regs
.start
[n
]);
str_append(&tmp
,rep_next
,1);
str_append(&tmp
,rep_cur
,rep_next
-rep_cur
);
if (offset
== regs
.end
[0]) {
str_append(&tmp
, line
.text
+ offset
, 1);
remain
= (line
.length
-1) - start
;
if(!(cur_cmd
->x
.cmd_regex
.flags
&S_GLOBAL_BIT
))
str_append(&tmp
,line
.text
+start
,remain
+1);
if(cur_cmd
->x
.cmd_regex
.flags
&S_WRITE_BIT
)
ck_fwrite(line
.text
,1,line
.length
,cur_cmd
->x
.cmd_regex
.wio_file
);
if(cur_cmd
->x
.cmd_regex
.flags
&S_PRINT_BIT
)
ck_fwrite(line
.text
,1,line
.length
,stdout
);
struct sed_label
*j
= cur_cmd
->x
.jump
;
n
= j
->v
->v_length
- j
->v_index
;
cur_cmd
= j
->v
->v
+ j
->v_index
;
ck_fwrite(line
.text
,1,line
.length
,cur_cmd
->x
.io_file
);
for(p
=(unsigned char *)(line
.text
),e
=p
+line
.length
;p
<e
;p
++)
*p
=cur_cmd
->x
.translate
[*p
];
panic("INTERNAL ERROR: Bad cmd %c",cur_cmd
->cmd
);
/* Return non-zero if the current line matches the address
switch(addr
->addr_type
) {
return (input_line_number
==addr
->addr_number
);
return (re_search(addr
->addr_regex
,
(struct re_registers
*)0)>=0) ? 1 : 0;
return (input_EOF
) ? 1 : 0;
panic("INTERNAL ERROR: bad address type");
/* Read in the next line of input, and store it in the
pattern space. Return non-zero if this is the last line of input */
line
.text
=ck_realloc(line
.text
,line
.alloc
*2);
line
.length
=line
.alloc
-n
;
line
.length
=line
.alloc
-n
;
/* Inplement the 'N' command, which appends the next line of input to
n
=line
.alloc
-line
.length
;
line
.length
=line
.alloc
-n
;
line
.length
=line
.alloc
-n
;
line
.text
=ck_realloc(line
.text
,line
.alloc
*2);
/* Copy the contents of the line 'from' into the line 'to'.
This destroys the old contents of 'to'. It will still work
if the line 'from' contains nulls. */
if(from
->length
>to
->alloc
) {
to
->text
=ck_realloc(to
->text
,to
->alloc
);
bcopy(from
->text
,to
->text
,from
->length
);
/* Append the contents of the line 'from' to the line 'to'.
This routine will work even if the line 'from' contains nulls */
if(from
->length
>(to
->alloc
-to
->length
)) {
to
->text
=ck_realloc(to
->text
,to
->alloc
);
bcopy(from
->text
,to
->text
+to
->length
,from
->length
);
to
->length
+=from
->length
;
/* Append 'length' bytes from 'string' to the line 'to'
This routine *will* append bytes with nulls in them, without
str_append(to
,string
,length
)
if(length
>to
->alloc
-to
->length
) {
to
->text
=ck_realloc(to
->text
,to
->alloc
);
bcopy(string
,to
->text
+to
->length
,length
);
Usage: %s [-nV] [+quiet] [+silent] [+version] [-e script] [-f script-file]\n\
[+expression=script] [+file=script-file] [file...]\n", myname
);