Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* GNU SED, a batch stream editor. |
2 | Copyright (C) 1989-1991 Free Software Foundation, Inc. | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2, or (at your option) | |
7 | any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
17 | ||
18 | #ifdef __STDC__ | |
19 | #define VOID void | |
20 | #else | |
21 | #define VOID char | |
22 | #endif | |
23 | ||
24 | #define _GNU_SOURCE | |
25 | #include <ctype.h> | |
26 | #ifndef isblank | |
27 | #define isblank(c) ((c) == ' ' || (c) == '\t') | |
28 | #endif | |
29 | #include <stdio.h> | |
30 | #include <regex.h> | |
31 | #include <getopt.h> | |
32 | #if defined(STDC_HEADERS) | |
33 | #include <stdlib.h> | |
34 | #endif | |
35 | #if defined(USG) || defined(STDC_HEADERS) | |
36 | #include <string.h> | |
37 | #include <memory.h> | |
38 | #define bcopy(s, d, n) (memcpy((d), (s), (n))) | |
39 | #else | |
40 | #include <strings.h> | |
41 | VOID *memchr(); | |
42 | #endif | |
43 | ||
44 | char *version_string = "GNU sed version 1.08"; | |
45 | ||
46 | /* Struct vector is used to describe a chunk of a sed program. There is one | |
47 | vector for the main program, and one for each { } pair. */ | |
48 | struct vector { | |
49 | struct sed_cmd *v; | |
50 | int v_length; | |
51 | int v_allocated; | |
52 | struct vector *up_one; | |
53 | struct vector *next_one; | |
54 | }; | |
55 | ||
56 | ||
57 | /* Goto structure is used to hold both GOTO's and labels. There are two | |
58 | separate lists, one of goto's, called 'jumps', and one of labels, called | |
59 | 'labels'. | |
60 | the V element points to the descriptor for the program-chunk in which the | |
61 | goto was encountered. | |
62 | the v_index element counts which element of the vector actually IS the | |
63 | goto/label. The first element of the vector is zero. | |
64 | the NAME element is the null-terminated name of the label. | |
65 | next is the next goto/label in the list. */ | |
66 | ||
67 | struct sed_label { | |
68 | struct vector *v; | |
69 | int v_index; | |
70 | char *name; | |
71 | struct sed_label *next; | |
72 | }; | |
73 | ||
74 | /* ADDR_TYPE is zero for a null address, | |
75 | one if addr_number is valid, or | |
76 | two if addr_regex is valid, | |
77 | three, if the address is '$' | |
78 | ||
79 | Other values are undefined. | |
80 | */ | |
81 | ||
82 | #define ADDR_NULL 0 | |
83 | #define ADDR_NUM 1 | |
84 | #define ADDR_REGEX 2 | |
85 | #define ADDR_LAST 3 | |
86 | ||
87 | struct addr { | |
88 | int addr_type; | |
89 | struct re_pattern_buffer *addr_regex; | |
90 | int addr_number; | |
91 | }; | |
92 | ||
93 | ||
94 | /* Aflags: If the low order bit is set, a1 has been | |
95 | matched; apply this command until a2 matches. | |
96 | If the next bit is set, apply this command to all | |
97 | lines that DON'T match the address(es). | |
98 | */ | |
99 | ||
100 | #define A1_MATCHED_BIT 01 | |
101 | #define ADDR_BANG_BIT 02 | |
102 | ||
103 | ||
104 | struct sed_cmd { | |
105 | struct addr a1,a2; | |
106 | int aflags; | |
107 | ||
108 | char cmd; | |
109 | ||
110 | union { | |
111 | /* This structure is used for a, i, and c commands */ | |
112 | struct { | |
113 | char *text; | |
114 | int text_len; | |
115 | } cmd_txt; | |
116 | ||
117 | /* This is used for b and t commands */ | |
118 | struct sed_cmd *label; | |
119 | ||
120 | /* This for r and w commands */ | |
121 | FILE *io_file; | |
122 | ||
123 | /* This for the hairy s command */ | |
124 | /* For the flags var: | |
125 | low order bit means the 'g' option was given, | |
126 | next bit means the 'p' option was given, | |
127 | and the next bit means a 'w' option was given, | |
128 | and wio_file contains the file to write to. */ | |
129 | ||
130 | #define S_GLOBAL_BIT 01 | |
131 | #define S_PRINT_BIT 02 | |
132 | #define S_WRITE_BIT 04 | |
133 | #define S_NUM_BIT 010 | |
134 | ||
135 | struct { | |
136 | struct re_pattern_buffer *regx; | |
137 | char *replacement; | |
138 | int replace_length; | |
139 | int flags; | |
140 | int numb; | |
141 | FILE *wio_file; | |
142 | } cmd_regex; | |
143 | ||
144 | /* This for the y command */ | |
145 | unsigned char *translate; | |
146 | ||
147 | /* For { and } */ | |
148 | struct vector *sub; | |
149 | struct sed_label *jump; | |
150 | } x; | |
151 | }; | |
152 | ||
153 | /* Sed operates a line at a time. */ | |
154 | struct line { | |
155 | char *text; /* Pointer to line allocated by malloc. */ | |
156 | int length; /* Length of text. */ | |
157 | int alloc; /* Allocated space for text. */ | |
158 | }; | |
159 | ||
160 | /* This structure holds information about files opend by the 'r', 'w', | |
161 | and 's///w' commands. In paticular, it holds the FILE pointer to | |
162 | use, the file's name, a flag that is non-zero if the file is being | |
163 | read instead of written. */ | |
164 | ||
165 | #define NUM_FPS 32 | |
166 | struct { | |
167 | FILE *phile; | |
168 | char *name; | |
169 | int readit; | |
170 | } file_ptrs[NUM_FPS]; | |
171 | ||
172 | ||
173 | #if defined(__STDC__) | |
174 | # define P_(s) s | |
175 | #else | |
176 | # define P_(s) () | |
177 | #endif | |
178 | ||
179 | void panic P_((char *str, ...)); | |
180 | char *__fp_name P_((FILE *fp)); | |
181 | FILE *ck_fopen P_((char *name, char *mode)); | |
182 | void ck_fwrite P_((char *ptr, int size, int nmemb, FILE *stream)); | |
183 | void ck_fclose P_((FILE *stream)); | |
184 | VOID *ck_malloc P_((int size)); | |
185 | VOID *ck_realloc P_((VOID *ptr, int size)); | |
186 | char *ck_strdup P_((char *str)); | |
187 | VOID *init_buffer P_((void)); | |
188 | void flush_buffer P_((VOID *bb)); | |
189 | int size_buffer P_((VOID *b)); | |
190 | void add_buffer P_((VOID *bb, char *p, int n)); | |
191 | void add1_buffer P_((VOID *bb, int ch)); | |
192 | char *get_buffer P_((VOID *bb)); | |
193 | ||
194 | void compile_string P_((char *str)); | |
195 | void compile_file P_((char *str)); | |
196 | struct vector *compile_program P_((struct vector *vector)); | |
197 | void bad_prog P_((char *why)); | |
198 | int inchar P_((void)); | |
199 | void savchar P_((int ch)); | |
200 | int compile_address P_((struct addr *addr)); | |
201 | void compile_regex P_((int slash)); | |
202 | struct sed_label *setup_jump P_((struct sed_label *list, struct sed_cmd *cmd, struct vector *vec)); | |
203 | FILE *compile_filename P_((int readit)); | |
204 | void read_file P_((char *name)); | |
205 | void execute_program P_((struct vector *vec)); | |
206 | int match_address P_((struct addr *addr)); | |
207 | int read_pattern_space P_((void)); | |
208 | void append_pattern_space P_((void)); | |
209 | void line_copy P_((struct line *from, struct line *to)); | |
210 | void line_append P_((struct line *from, struct line *to)); | |
211 | void str_append P_((struct line *to, char *string, int length)); | |
212 | void usage P_((void)); | |
213 | ||
214 | extern char *myname; | |
215 | ||
216 | /* If set, don't write out the line unless explictly told to */ | |
217 | int no_default_output = 0; | |
218 | ||
219 | /* Current input line # */ | |
220 | int input_line_number = 0; | |
221 | ||
222 | /* Are we on the last input file? */ | |
223 | int last_input_file = 0; | |
224 | ||
225 | /* Have we hit EOF on the last input file? This is used to decide if we | |
226 | have hit the '$' address yet. */ | |
227 | int input_EOF = 0; | |
228 | ||
229 | /* non-zero if a quit command has been executed. */ | |
230 | int quit_cmd = 0; | |
231 | ||
232 | /* Have we done any replacements lately? This is used by the 't' command. */ | |
233 | int replaced = 0; | |
234 | ||
235 | /* How many '{'s are we executing at the moment */ | |
236 | int program_depth = 0; | |
237 | ||
238 | /* The complete compiled SED program that we are going to run */ | |
239 | struct vector *the_program = 0; | |
240 | ||
241 | /* information about labels and jumps-to-labels. This is used to do | |
242 | the required backpatching after we have compiled all the scripts. */ | |
243 | struct sed_label *jumps = 0; | |
244 | struct sed_label *labels = 0; | |
245 | ||
246 | /* The 'current' input line. */ | |
247 | struct line line; | |
248 | ||
249 | /* An input line that's been stored by later use by the program */ | |
250 | struct line hold; | |
251 | ||
252 | /* A 'line' to append to the current line when it comes time to write it out */ | |
253 | struct line append; | |
254 | ||
255 | ||
256 | /* When we're reading a script command from a string, 'prog_start' and | |
257 | 'prog_end' point to the beginning and end of the string. This | |
258 | would allow us to compile script strings that contain nulls, except | |
259 | that script strings are only read from the command line, which is | |
260 | null-terminated */ | |
261 | char *prog_start; | |
262 | char *prog_end; | |
263 | ||
264 | /* When we're reading a script command from a string, 'prog_cur' points | |
265 | to the current character in the string */ | |
266 | char *prog_cur; | |
267 | ||
268 | /* This is the name of the current script file. | |
269 | It is used for error messages. */ | |
270 | char *prog_name; | |
271 | ||
272 | /* This is the current script file. If it is zero, we are reading | |
273 | from a string stored in 'prog_start' instead. If both 'prog_file' | |
274 | and 'prog_start' are zero, we're in trouble! */ | |
275 | FILE *prog_file; | |
276 | ||
277 | /* this is the number of the current script line that we're compiling. It is | |
278 | used to give out useful and informative error messages. */ | |
279 | int prog_line = 1; | |
280 | ||
281 | /* This is the file pointer that we're currently reading data from. It may | |
282 | be stdin */ | |
283 | FILE *input_file; | |
284 | ||
285 | /* If this variable is non-zero at exit, one or more of the input | |
286 | files couldn't be opened. */ | |
287 | ||
288 | int bad_input = 0; | |
289 | ||
290 | /* 'an empty regular expression is equivalent to the last regular | |
291 | expression read' so we have to keep track of the last regex used. | |
292 | Here's where we store a pointer to it (it is only malloc()'d once) */ | |
293 | struct re_pattern_buffer *last_regex; | |
294 | ||
295 | /* Various error messages we may want to print */ | |
296 | static char ONE_ADDR[] = "Command only uses one address"; | |
297 | static char NO_ADDR[] = "Command doesn't take any addresses"; | |
298 | static char LINE_JUNK[] = "Extra characters after command"; | |
299 | static char BAD_EOF[] = "Unexpected End-of-file"; | |
300 | static char NO_REGEX[] = "No previous regular expression"; | |
301 | ||
302 | static struct option longopts[] = | |
303 | { | |
304 | {"expression", 1, NULL, 'e'}, | |
305 | {"file", 1, NULL, 'f'}, | |
306 | {"quiet", 0, NULL, 'n'}, | |
307 | {"silent", 0, NULL, 'n'}, | |
308 | {"version", 0, NULL, 'V'}, | |
309 | {NULL, 0, NULL, 0} | |
310 | }; | |
311 | ||
312 | /* Yes, the main program, which parses arguments, and does the right | |
313 | thing with them; it also inits the temporary storage, etc. */ | |
314 | void | |
315 | main(argc,argv) | |
316 | int argc; | |
317 | char **argv; | |
318 | { | |
319 | int opt; | |
320 | char *e_strings = NULL; | |
321 | int compiled = 0; | |
322 | struct sed_label *go,*lbl; | |
323 | ||
324 | myname=argv[0]; | |
325 | while((opt=getopt_long(argc,argv,"ne:f:V", longopts, (int *) 0)) | |
326 | !=EOF) { | |
327 | switch(opt) { | |
328 | case 'n': | |
329 | no_default_output = 1; | |
330 | break; | |
331 | case 'e': | |
332 | if(e_strings == NULL) { | |
333 | e_strings=ck_malloc(strlen(optarg)+2); | |
334 | strcpy(e_strings,optarg); | |
335 | } else { | |
336 | e_strings=ck_realloc(e_strings,strlen(e_strings)+strlen(optarg)+2); | |
337 | strcat(e_strings,optarg); | |
338 | } | |
339 | strcat(e_strings,"\n"); | |
340 | compiled = 1; | |
341 | break; | |
342 | case 'f': | |
343 | compile_file(optarg); | |
344 | compiled = 1; | |
345 | break; | |
346 | case 'V': | |
347 | fprintf(stderr, "%s\n", version_string); | |
348 | break; | |
349 | default: | |
350 | usage(); | |
351 | } | |
352 | } | |
353 | if(e_strings) { | |
354 | compile_string(e_strings); | |
355 | free(e_strings); | |
356 | } | |
357 | if(!compiled) { | |
358 | if (optind == argc) | |
359 | usage(); | |
360 | compile_string(argv[optind++]); | |
361 | } | |
362 | ||
363 | for(go=jumps;go;go=go->next) { | |
364 | for(lbl=labels;lbl;lbl=lbl->next) | |
365 | if(!strcmp(lbl->name,go->name)) | |
366 | break; | |
367 | if(*go->name && !lbl) | |
368 | panic("Can't find label for jump to '%s'",go->name); | |
369 | go->v->v[go->v_index].x.jump=lbl; | |
370 | } | |
371 | ||
372 | line.length=0; | |
373 | line.alloc=50; | |
374 | line.text=ck_malloc(50); | |
375 | ||
376 | append.length=0; | |
377 | append.alloc=50; | |
378 | append.text=ck_malloc(50); | |
379 | ||
380 | hold.length=0; | |
381 | hold.alloc=50; | |
382 | hold.text=ck_malloc(50); | |
383 | ||
384 | if(argc<=optind) { | |
385 | last_input_file++; | |
386 | read_file("-"); | |
387 | } else while(optind<argc) { | |
388 | if(optind==argc-1) | |
389 | last_input_file++; | |
390 | read_file(argv[optind]); | |
391 | optind++; | |
392 | if(quit_cmd) | |
393 | break; | |
394 | } | |
395 | if(bad_input) | |
396 | exit(2); | |
397 | exit(0); | |
398 | } | |
399 | ||
400 | /* 'str' is a string (from the command line) that contains a sed command. | |
401 | Compile the command, and add it to the end of 'the_program' */ | |
402 | void | |
403 | compile_string(str) | |
404 | char *str; | |
405 | { | |
406 | prog_file = 0; | |
407 | prog_line=0; | |
408 | prog_start=prog_cur=str; | |
409 | prog_end=str+strlen(str); | |
410 | the_program=compile_program(the_program); | |
411 | } | |
412 | ||
413 | /* 'str' is the name of a file containing sed commands. Read them in | |
414 | and add them to the end of 'the_program' */ | |
415 | void | |
416 | compile_file(str) | |
417 | char *str; | |
418 | { | |
419 | int ch; | |
420 | ||
421 | prog_start=prog_cur=prog_end=0; | |
422 | prog_name=str; | |
423 | prog_line=1; | |
424 | if(str[0]=='-' && str[1]=='\0') | |
425 | prog_file=stdin; | |
426 | else | |
427 | prog_file=ck_fopen(str,"r"); | |
428 | ch=getc(prog_file); | |
429 | if(ch=='#') { | |
430 | ch=getc(prog_file); | |
431 | if(ch=='n') | |
432 | no_default_output++; | |
433 | while(ch!=EOF && ch!='\n') | |
434 | ch=getc(prog_file); | |
435 | } else if(ch!=EOF) | |
436 | ungetc(ch,prog_file); | |
437 | the_program=compile_program(the_program); | |
438 | } | |
439 | ||
440 | #define MORE_CMDS 40 | |
441 | ||
442 | /* Read a program (or a subprogram within '{' '}' pairs) in and store | |
443 | the compiled form in *'vector' Return a pointer to the new vector. */ | |
444 | struct vector * | |
445 | compile_program(vector) | |
446 | struct vector *vector; | |
447 | { | |
448 | struct sed_cmd *cur_cmd; | |
449 | int ch; | |
450 | int slash; | |
451 | VOID *b; | |
452 | unsigned char *string; | |
453 | int num; | |
454 | ||
455 | if(!vector) { | |
456 | vector=(struct vector *)ck_malloc(sizeof(struct vector)); | |
457 | vector->v=(struct sed_cmd *)ck_malloc(MORE_CMDS*sizeof(struct sed_cmd)); | |
458 | vector->v_allocated=MORE_CMDS; | |
459 | vector->v_length=0; | |
460 | vector->up_one = 0; | |
461 | vector->next_one = 0; | |
462 | } | |
463 | for(;;) { | |
464 | skip_comment: | |
465 | do ch=inchar(); | |
466 | while(ch!=EOF && (isblank(ch) || ch=='\n' || ch==';')); | |
467 | if(ch==EOF) | |
468 | break; | |
469 | savchar(ch); | |
470 | ||
471 | if(vector->v_length==vector->v_allocated) { | |
472 | vector->v=(struct sed_cmd *)ck_realloc((VOID *)vector->v,(vector->v_length+MORE_CMDS)*sizeof(struct sed_cmd)); | |
473 | vector->v_allocated+=MORE_CMDS; | |
474 | } | |
475 | cur_cmd=vector->v+vector->v_length; | |
476 | vector->v_length++; | |
477 | ||
478 | cur_cmd->a1.addr_type=0; | |
479 | cur_cmd->a2.addr_type=0; | |
480 | cur_cmd->aflags=0; | |
481 | cur_cmd->cmd=0; | |
482 | ||
483 | if(compile_address(&(cur_cmd->a1))) { | |
484 | ch=inchar(); | |
485 | if(ch==',') { | |
486 | do ch=inchar(); | |
487 | while(ch!=EOF && isblank(ch)); | |
488 | savchar(ch); | |
489 | if(compile_address(&(cur_cmd->a2))) | |
490 | ; | |
491 | else | |
492 | bad_prog("Unexpected ','"); | |
493 | } else | |
494 | savchar(ch); | |
495 | } | |
496 | ch=inchar(); | |
497 | if(ch==EOF) | |
498 | break; | |
499 | new_cmd: | |
500 | switch(ch) { | |
501 | case '#': | |
502 | if(cur_cmd->a1.addr_type!=0) | |
503 | bad_prog(NO_ADDR); | |
504 | do ch=inchar(); | |
505 | while(ch!=EOF && ch!='\n'); | |
506 | vector->v_length--; | |
507 | goto skip_comment; | |
508 | case '!': | |
509 | if(cur_cmd->aflags & ADDR_BANG_BIT) | |
510 | bad_prog("Multiple '!'s"); | |
511 | cur_cmd->aflags|= ADDR_BANG_BIT; | |
512 | do ch=inchar(); | |
513 | while(ch!=EOF && isblank(ch)); | |
514 | if(ch==EOF) | |
515 | bad_prog(BAD_EOF); | |
516 | #if 0 | |
517 | savchar(ch); | |
518 | #endif | |
519 | goto new_cmd; | |
520 | case 'a': | |
521 | case 'i': | |
522 | if(cur_cmd->a2.addr_type!=0) | |
523 | bad_prog(ONE_ADDR); | |
524 | /* Fall Through */ | |
525 | case 'c': | |
526 | cur_cmd->cmd=ch; | |
527 | if(inchar()!='\\' || inchar()!='\n') | |
528 | bad_prog(LINE_JUNK); | |
529 | b=init_buffer(); | |
530 | while((ch=inchar())!=EOF && ch!='\n') { | |
531 | if(ch=='\\') | |
532 | ch=inchar(); | |
533 | add1_buffer(b,ch); | |
534 | } | |
535 | if(ch!=EOF) | |
536 | add1_buffer(b,ch); | |
537 | num=size_buffer(b); | |
538 | string=(unsigned char *)ck_malloc(num); | |
539 | bcopy(get_buffer(b),string,num); | |
540 | flush_buffer(b); | |
541 | cur_cmd->x.cmd_txt.text_len=num; | |
542 | cur_cmd->x.cmd_txt.text=(char *)string; | |
543 | break; | |
544 | case '{': | |
545 | cur_cmd->cmd=ch; | |
546 | program_depth++; | |
547 | #if 0 | |
548 | while((ch=inchar())!=EOF && ch!='\n') | |
549 | if(!isblank(ch)) | |
550 | bad_prog(LINE_JUNK); | |
551 | #endif | |
552 | cur_cmd->x.sub=compile_program((struct vector *)0); | |
553 | /* FOO JF is this the right thing to do? */ | |
554 | break; | |
555 | case '}': | |
556 | if(!program_depth) | |
557 | bad_prog("Unexpected '}'"); | |
558 | --(vector->v_length); | |
559 | while((ch=inchar())!=EOF && ch!='\n' && ch!=';') | |
560 | if(!isblank(ch)) | |
561 | bad_prog(LINE_JUNK); | |
562 | return vector; | |
563 | case ':': | |
564 | cur_cmd->cmd=ch; | |
565 | if(cur_cmd->a1.addr_type!=0) | |
566 | bad_prog(": doesn't want any addresses"); | |
567 | labels=setup_jump(labels,cur_cmd,vector); | |
568 | break; | |
569 | case 'b': | |
570 | case 't': | |
571 | cur_cmd->cmd=ch; | |
572 | jumps=setup_jump(jumps,cur_cmd,vector); | |
573 | break; | |
574 | case 'q': | |
575 | case '=': | |
576 | if(cur_cmd->a2.addr_type) | |
577 | bad_prog(ONE_ADDR); | |
578 | /* Fall Through */ | |
579 | case 'd': | |
580 | case 'D': | |
581 | case 'g': | |
582 | case 'G': | |
583 | case 'h': | |
584 | case 'H': | |
585 | case 'l': | |
586 | case 'n': | |
587 | case 'N': | |
588 | case 'p': | |
589 | case 'P': | |
590 | case 'x': | |
591 | cur_cmd->cmd=ch; | |
592 | do ch=inchar(); | |
593 | while(ch!=EOF && isblank(ch) && ch!='\n' && ch!=';'); | |
594 | if(ch!='\n' && ch!=';' && ch!=EOF) | |
595 | bad_prog(LINE_JUNK); | |
596 | break; | |
597 | ||
598 | case 'r': | |
599 | if(cur_cmd->a2.addr_type!=0) | |
600 | bad_prog(ONE_ADDR); | |
601 | /* FALL THROUGH */ | |
602 | case 'w': | |
603 | cur_cmd->cmd=ch; | |
604 | cur_cmd->x.io_file=compile_filename(ch=='r'); | |
605 | break; | |
606 | ||
607 | case 's': | |
608 | cur_cmd->cmd=ch; | |
609 | slash=inchar(); | |
610 | compile_regex(slash); | |
611 | ||
612 | cur_cmd->x.cmd_regex.regx=last_regex; | |
613 | ||
614 | b=init_buffer(); | |
615 | while((ch=inchar())!=EOF && ch!=slash) { | |
616 | if(ch=='\\') { | |
617 | int ci; | |
618 | ||
619 | ci=inchar(); | |
620 | if(ci!=EOF) { | |
621 | if(ci!='\n') | |
622 | add1_buffer(b,ch); | |
623 | add1_buffer(b,ci); | |
624 | } | |
625 | } else | |
626 | add1_buffer(b,ch); | |
627 | } | |
628 | cur_cmd->x.cmd_regex.replace_length=size_buffer(b); | |
629 | cur_cmd->x.cmd_regex.replacement=ck_malloc(cur_cmd->x.cmd_regex.replace_length); | |
630 | bcopy(get_buffer(b),cur_cmd->x.cmd_regex.replacement,cur_cmd->x.cmd_regex.replace_length); | |
631 | flush_buffer(b); | |
632 | ||
633 | cur_cmd->x.cmd_regex.flags=0; | |
634 | cur_cmd->x.cmd_regex.numb=0; | |
635 | ||
636 | if(ch==EOF) | |
637 | break; | |
638 | do { | |
639 | ch=inchar(); | |
640 | switch(ch) { | |
641 | case 'p': | |
642 | if(cur_cmd->x.cmd_regex.flags&S_PRINT_BIT) | |
643 | bad_prog("multiple 'p' options to 's' command"); | |
644 | cur_cmd->x.cmd_regex.flags|=S_PRINT_BIT; | |
645 | break; | |
646 | case 'g': | |
647 | if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT) | |
648 | cur_cmd->x.cmd_regex.flags&= ~S_NUM_BIT; | |
649 | if(cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT) | |
650 | bad_prog("multiple 'g' options to 's' command"); | |
651 | cur_cmd->x.cmd_regex.flags|=S_GLOBAL_BIT; | |
652 | break; | |
653 | case 'w': | |
654 | cur_cmd->x.cmd_regex.flags|=S_WRITE_BIT; | |
655 | cur_cmd->x.cmd_regex.wio_file=compile_filename(0); | |
656 | ch='\n'; | |
657 | break; | |
658 | case '0': case '1': case '2': case '3': | |
659 | case '4': case '5': case '6': case '7': | |
660 | case '8': case '9': | |
661 | if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT) | |
662 | bad_prog("multiple number options to 's' command"); | |
663 | if((cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT)==0) | |
664 | cur_cmd->x.cmd_regex.flags|=S_NUM_BIT; | |
665 | num = 0; | |
666 | while(isdigit(ch)) { | |
667 | num=num*10+ch-'0'; | |
668 | ch=inchar(); | |
669 | } | |
670 | savchar(ch); | |
671 | cur_cmd->x.cmd_regex.numb=num; | |
672 | break; | |
673 | case '\n': | |
674 | case ';': | |
675 | case EOF: | |
676 | break; | |
677 | default: | |
678 | bad_prog("Unknown option to 's'"); | |
679 | break; | |
680 | } | |
681 | } while(ch!=EOF && ch!='\n' && ch!=';'); | |
682 | if(ch==EOF) | |
683 | break; | |
684 | break; | |
685 | ||
686 | case 'y': | |
687 | cur_cmd->cmd=ch; | |
688 | string=(unsigned char *)ck_malloc(256); | |
689 | for(num=0;num<256;num++) | |
690 | string[num]=num; | |
691 | b=init_buffer(); | |
692 | slash=inchar(); | |
693 | while((ch=inchar())!=EOF && ch!=slash) | |
694 | add1_buffer(b,ch); | |
695 | cur_cmd->x.translate=string; | |
696 | string=(unsigned char *)get_buffer(b); | |
697 | for(num=size_buffer(b);num;--num) { | |
698 | ch=inchar(); | |
699 | if(ch==EOF) | |
700 | bad_prog(BAD_EOF); | |
701 | if(ch==slash) | |
702 | bad_prog("strings for y command are different lengths"); | |
703 | cur_cmd->x.translate[*string++]=ch; | |
704 | } | |
705 | flush_buffer(b); | |
706 | if(inchar()!=slash || ((ch=inchar())!=EOF && ch!='\n' && ch!=';')) | |
707 | bad_prog(LINE_JUNK); | |
708 | break; | |
709 | ||
710 | default: | |
711 | bad_prog("Unknown command"); | |
712 | } | |
713 | } | |
714 | return vector; | |
715 | } | |
716 | ||
717 | /* Complain about a programming error and exit. */ | |
718 | void | |
719 | bad_prog(why) | |
720 | char *why; | |
721 | { | |
722 | if(prog_line) | |
723 | fprintf(stderr,"%s: file %s line %d: %s\n",myname,prog_name,prog_line,why); | |
724 | else | |
725 | fprintf(stderr,"%s: %s\n",myname,why); | |
726 | exit(1); | |
727 | } | |
728 | ||
729 | /* Read the next character from the program. Return EOF if there isn't | |
730 | anything to read. Keep prog_line up to date, so error messages can | |
731 | be meaningful. */ | |
732 | int | |
733 | inchar() | |
734 | { | |
735 | int ch; | |
736 | if(prog_file) { | |
737 | if(feof(prog_file)) | |
738 | return EOF; | |
739 | else | |
740 | ch=getc(prog_file); | |
741 | } else { | |
742 | if(!prog_cur) | |
743 | return EOF; | |
744 | else if(prog_cur==prog_end) { | |
745 | ch=EOF; | |
746 | prog_cur=0; | |
747 | } else | |
748 | ch= *prog_cur++; | |
749 | } | |
750 | if(ch=='\n' && prog_line) | |
751 | prog_line++; | |
752 | return ch; | |
753 | } | |
754 | ||
755 | /* unget 'ch' so the next call to inchar will return it. 'ch' must not be | |
756 | EOF or anything nasty like that. */ | |
757 | void | |
758 | savchar(ch) | |
759 | int ch; | |
760 | { | |
761 | if(ch==EOF) | |
762 | return; | |
763 | if(ch=='\n' && prog_line>1) | |
764 | --prog_line; | |
765 | if(prog_file) | |
766 | ungetc(ch,prog_file); | |
767 | else | |
768 | *--prog_cur=ch; | |
769 | } | |
770 | ||
771 | ||
772 | /* Try to read an address for a sed command. If it succeeeds, | |
773 | return non-zero and store the resulting address in *'addr'. | |
774 | If the input doesn't look like an address read nothing | |
775 | and return zero. */ | |
776 | int | |
777 | compile_address(addr) | |
778 | struct addr *addr; | |
779 | { | |
780 | int ch; | |
781 | int num; | |
782 | ||
783 | ch=inchar(); | |
784 | ||
785 | if(isdigit(ch)) { | |
786 | num=ch-'0'; | |
787 | while((ch=inchar())!=EOF && isdigit(ch)) | |
788 | num=num*10+ch-'0'; | |
789 | while(ch!=EOF && isblank(ch)) | |
790 | ch=inchar(); | |
791 | savchar(ch); | |
792 | addr->addr_type=ADDR_NUM; | |
793 | addr->addr_number = num; | |
794 | return 1; | |
795 | } else if(ch=='/') { | |
796 | addr->addr_type=ADDR_REGEX; | |
797 | compile_regex('/'); | |
798 | addr->addr_regex=last_regex; | |
799 | do ch=inchar(); | |
800 | while(ch!=EOF && isblank(ch)); | |
801 | savchar(ch); | |
802 | return 1; | |
803 | } else if(ch=='$') { | |
804 | addr->addr_type=ADDR_LAST; | |
805 | do ch=inchar(); | |
806 | while(ch!=EOF && isblank(ch)); | |
807 | savchar(ch); | |
808 | return 1; | |
809 | } else | |
810 | savchar(ch); | |
811 | return 0; | |
812 | } | |
813 | ||
814 | void | |
815 | compile_regex (slash) | |
816 | int slash; | |
817 | { | |
818 | VOID *b; | |
819 | int ch; | |
820 | int in_char_class = 0; | |
821 | ||
822 | b=init_buffer(); | |
823 | while((ch=inchar())!=EOF && (ch!=slash || in_char_class)) { | |
824 | if(ch=='^') { | |
825 | if(size_buffer(b)==0) { | |
826 | add1_buffer(b,'\\'); | |
827 | add1_buffer(b,'`'); | |
828 | } else | |
829 | add1_buffer(b,ch); | |
830 | continue; | |
831 | } else if(ch=='$') { | |
832 | ch=inchar(); | |
833 | savchar(ch); | |
834 | if(ch==slash) { | |
835 | add1_buffer(b,'\\'); | |
836 | add1_buffer(b,'\''); | |
837 | } else | |
838 | add1_buffer(b,'$'); | |
839 | continue; | |
840 | } else if(ch == '[') { | |
841 | add1_buffer(b,ch); | |
842 | in_char_class = 1; | |
843 | continue; | |
844 | } else if(ch == ']') { | |
845 | add1_buffer(b,ch); | |
846 | in_char_class = 0; | |
847 | continue; | |
848 | } else if(ch!='\\') { | |
849 | add1_buffer(b,ch); | |
850 | continue; | |
851 | } | |
852 | ch=inchar(); | |
853 | switch(ch) { | |
854 | case 'n': | |
855 | add1_buffer(b,'\n'); | |
856 | break; | |
857 | #if 0 | |
858 | case 'b': | |
859 | add1_buffer(b,'\b'); | |
860 | break; | |
861 | case 'f': | |
862 | add1_buffer(b,'\f'); | |
863 | break; | |
864 | case 'r': | |
865 | add1_buffer(b,'\r'); | |
866 | break; | |
867 | case 't': | |
868 | add1_buffer(b,'\t'); | |
869 | break; | |
870 | #endif /* 0 */ | |
871 | case EOF: | |
872 | break; | |
873 | default: | |
874 | add1_buffer(b,'\\'); | |
875 | add1_buffer(b,ch); | |
876 | break; | |
877 | } | |
878 | } | |
879 | if(ch==EOF) | |
880 | bad_prog(BAD_EOF); | |
881 | if(size_buffer(b)) { | |
882 | last_regex=(struct re_pattern_buffer *)ck_malloc(sizeof(struct re_pattern_buffer)); | |
883 | last_regex->allocated=size_buffer(b)+10; | |
884 | last_regex->buffer=ck_malloc(last_regex->allocated); | |
885 | last_regex->fastmap=ck_malloc(256); | |
886 | last_regex->translate=0; | |
887 | re_compile_pattern(get_buffer(b),size_buffer(b),last_regex); | |
888 | } else if(!last_regex) | |
889 | bad_prog(NO_REGEX); | |
890 | flush_buffer(b); | |
891 | } | |
892 | ||
893 | /* Store a label (or label reference) created by a ':', 'b', or 't' | |
894 | comand so that the jump to/from the lable can be backpatched after | |
895 | compilation is complete */ | |
896 | struct sed_label * | |
897 | setup_jump(list,cmd,vec) | |
898 | struct sed_label *list; | |
899 | struct sed_cmd *cmd; | |
900 | struct vector *vec; | |
901 | { | |
902 | struct sed_label *tmp; | |
903 | VOID *b; | |
904 | int ch; | |
905 | ||
906 | b=init_buffer(); | |
907 | while((ch=inchar()) != EOF && isblank(ch)) | |
908 | ; | |
909 | while(ch!=EOF && ch!='\n') { | |
910 | add1_buffer(b,ch); | |
911 | ch=inchar(); | |
912 | } | |
913 | savchar(ch); | |
914 | add1_buffer(b,'\0'); | |
915 | tmp=(struct sed_label *)ck_malloc(sizeof(struct sed_label)); | |
916 | tmp->v=vec; | |
917 | tmp->v_index=cmd-vec->v; | |
918 | tmp->name=ck_strdup(get_buffer(b)); | |
919 | tmp->next=list; | |
920 | flush_buffer(b); | |
921 | return tmp; | |
922 | } | |
923 | ||
924 | /* read in a filename for a 'r', 'w', or 's///w' command, and | |
925 | update the internal structure about files. The file is | |
926 | opened if it isn't already open. */ | |
927 | FILE * | |
928 | compile_filename(readit) | |
929 | int readit; | |
930 | { | |
931 | char *file_name; | |
932 | int n; | |
933 | VOID *b; | |
934 | int ch; | |
935 | ||
936 | if(inchar()!=' ') | |
937 | bad_prog("missing ' ' before filename"); | |
938 | b=init_buffer(); | |
939 | while((ch=inchar())!=EOF && ch!='\n') | |
940 | add1_buffer(b,ch); | |
941 | add1_buffer(b,'\0'); | |
942 | file_name=get_buffer(b); | |
943 | for(n=0;n<NUM_FPS;n++) { | |
944 | if(!file_ptrs[n].name) | |
945 | break; | |
946 | if(!strcmp(file_ptrs[n].name,file_name)) { | |
947 | if(file_ptrs[n].readit!=readit) | |
948 | bad_prog("Can't open file for both reading and writing"); | |
949 | flush_buffer(b); | |
950 | return file_ptrs[n].phile; | |
951 | } | |
952 | } | |
953 | if(n<NUM_FPS) { | |
954 | file_ptrs[n].name=ck_strdup(file_name); | |
955 | file_ptrs[n].readit=readit; | |
956 | if (!readit) | |
957 | file_ptrs[n].phile=ck_fopen(file_name,"a"); | |
958 | else if (access(file_name, 4) == 0) | |
959 | file_ptrs[n].phile=ck_fopen(file_name,"r"); | |
960 | else | |
961 | file_ptrs[n].phile=ck_fopen("/dev/null", "r"); | |
962 | flush_buffer(b); | |
963 | return file_ptrs[n].phile; | |
964 | } else { | |
965 | bad_prog("Hopelessely evil compiled in limit on number of open files. re-compile sed"); | |
966 | return 0; | |
967 | } | |
968 | } | |
969 | ||
970 | /* Parse a filename given by a 'r' 'w' or 's///w' command. */ | |
971 | void | |
972 | read_file(name) | |
973 | char *name; | |
974 | { | |
975 | if(*name=='-' && name[1]=='\0') | |
976 | input_file=stdin; | |
977 | else { | |
978 | input_file=fopen(name,"r"); | |
979 | if(input_file==0) { | |
980 | extern int errno; | |
981 | extern char *sys_errlist[]; | |
982 | extern int sys_nerr; | |
983 | ||
984 | char *ptr; | |
985 | ||
986 | ptr=(errno>=0 && errno<sys_nerr) ? sys_errlist[errno] : "Unknown error code"; | |
987 | bad_input++; | |
988 | fprintf(stderr,"%s: can't read %s: %s\n",myname,name,ptr); | |
989 | ||
990 | return; | |
991 | } | |
992 | } | |
993 | while(read_pattern_space()) { | |
994 | execute_program(the_program); | |
995 | if(!no_default_output) | |
996 | ck_fwrite(line.text,1,line.length,stdout); | |
997 | if(append.length) { | |
998 | ck_fwrite(append.text,1,append.length,stdout); | |
999 | append.length=0; | |
1000 | } | |
1001 | if(quit_cmd) | |
1002 | break; | |
1003 | } | |
1004 | ck_fclose(input_file); | |
1005 | } | |
1006 | ||
1007 | /* Execute the program 'vec' on the current input line. */ | |
1008 | void | |
1009 | execute_program(vec) | |
1010 | struct vector *vec; | |
1011 | { | |
1012 | struct sed_cmd *cur_cmd; | |
1013 | int n; | |
1014 | int addr_matched; | |
1015 | static int end_cycle; | |
1016 | ||
1017 | int start; | |
1018 | int remain; | |
1019 | int offset; | |
1020 | ||
1021 | static struct line tmp; | |
1022 | struct line t; | |
1023 | char *rep,*rep_end,*rep_next,*rep_cur; | |
1024 | ||
1025 | struct re_registers regs; | |
1026 | int count = 0; | |
1027 | ||
1028 | end_cycle = 0; | |
1029 | ||
1030 | for(cur_cmd=vec->v,n=vec->v_length;n;cur_cmd++,n--) { | |
1031 | ||
1032 | exe_loop: | |
1033 | addr_matched=0; | |
1034 | if(cur_cmd->aflags&A1_MATCHED_BIT) { | |
1035 | addr_matched=1; | |
1036 | if(match_address(&(cur_cmd->a2))) | |
1037 | cur_cmd->aflags&=~A1_MATCHED_BIT; | |
1038 | } else if(match_address(&(cur_cmd->a1))) { | |
1039 | addr_matched=1; | |
1040 | if(cur_cmd->a2.addr_type!=ADDR_NULL) | |
1041 | cur_cmd->aflags|=A1_MATCHED_BIT; | |
1042 | } | |
1043 | if(cur_cmd->aflags&ADDR_BANG_BIT) | |
1044 | addr_matched= !addr_matched; | |
1045 | if(!addr_matched) | |
1046 | continue; | |
1047 | switch(cur_cmd->cmd) { | |
1048 | case '{': /* Execute sub-program */ | |
1049 | execute_program(cur_cmd->x.sub); | |
1050 | break; | |
1051 | ||
1052 | case ':': /* Executing labels is easy. */ | |
1053 | break; | |
1054 | ||
1055 | case '=': | |
1056 | printf("%d\n",input_line_number); | |
1057 | break; | |
1058 | ||
1059 | case 'a': | |
1060 | while(append.alloc-append.length<cur_cmd->x.cmd_txt.text_len) { | |
1061 | append.alloc *= 2; | |
1062 | append.text=ck_realloc(append.text,append.alloc); | |
1063 | } | |
1064 | bcopy(cur_cmd->x.cmd_txt.text,append.text+append.length,cur_cmd->x.cmd_txt.text_len); | |
1065 | append.length+=cur_cmd->x.cmd_txt.text_len; | |
1066 | break; | |
1067 | ||
1068 | case 'b': | |
1069 | if(!cur_cmd->x.jump) | |
1070 | end_cycle++; | |
1071 | else { | |
1072 | struct sed_label *j = cur_cmd->x.jump; | |
1073 | ||
1074 | n= j->v->v_length - j->v_index; | |
1075 | cur_cmd= j->v->v + j->v_index; | |
1076 | goto exe_loop; | |
1077 | } | |
1078 | break; | |
1079 | ||
1080 | case 'c': | |
1081 | line.length=0; | |
1082 | if(!(cur_cmd->aflags&A1_MATCHED_BIT)) | |
1083 | ck_fwrite(cur_cmd->x.cmd_txt.text,1,cur_cmd->x.cmd_txt.text_len,stdout); | |
1084 | end_cycle++; | |
1085 | break; | |
1086 | ||
1087 | case 'd': | |
1088 | line.length=0; | |
1089 | end_cycle++; | |
1090 | break; | |
1091 | ||
1092 | case 'D': | |
1093 | { | |
1094 | char *tmp; | |
1095 | int newlength; | |
1096 | ||
1097 | tmp=memchr(line.text,'\n',line.length); | |
1098 | newlength=line.length-(tmp-line.text); | |
1099 | if(newlength) | |
1100 | memmove(line.text,tmp,newlength); | |
1101 | line.length=newlength; | |
1102 | } | |
1103 | end_cycle++; | |
1104 | break; | |
1105 | ||
1106 | case 'g': | |
1107 | line_copy(&hold,&line); | |
1108 | break; | |
1109 | ||
1110 | case 'G': | |
1111 | line_append(&hold,&line); | |
1112 | break; | |
1113 | ||
1114 | case 'h': | |
1115 | line_copy(&line,&hold); | |
1116 | break; | |
1117 | ||
1118 | case 'H': | |
1119 | line_append(&line,&hold); | |
1120 | break; | |
1121 | ||
1122 | case 'i': | |
1123 | ck_fwrite(cur_cmd->x.cmd_txt.text,1,cur_cmd->x.cmd_txt.text_len,stdout); | |
1124 | break; | |
1125 | ||
1126 | case 'l': | |
1127 | { | |
1128 | char *tmp; | |
1129 | int n; | |
1130 | int width = 0; | |
1131 | ||
1132 | n=line.length; | |
1133 | tmp=line.text; | |
1134 | /* Use --n so this'll skip the trailing newline */ | |
1135 | while(--n) { | |
1136 | if(width>77) { | |
1137 | width=0; | |
1138 | putchar('\n'); | |
1139 | } | |
1140 | if(*tmp == '\\') { | |
1141 | printf("\\\\"); | |
1142 | width+=2; | |
1143 | } else if(isprint(*tmp)) { | |
1144 | putchar(*tmp); | |
1145 | width++; | |
1146 | } else switch(*tmp) { | |
1147 | #if 0 | |
1148 | /* Should print \00 instead of \0 because (a) POSIX requires it, and | |
1149 | (b) this way \01 is unambiguous. */ | |
1150 | case '\0': | |
1151 | printf("\\0"); | |
1152 | width+=2; | |
1153 | break; | |
1154 | #endif | |
1155 | case 007: | |
1156 | printf("\\a"); | |
1157 | width+=2; | |
1158 | break; | |
1159 | case '\b': | |
1160 | printf("\\b"); | |
1161 | width+=2; | |
1162 | break; | |
1163 | case '\f': | |
1164 | printf("\\f"); | |
1165 | width+=2; | |
1166 | break; | |
1167 | case '\n': | |
1168 | printf("\\n"); | |
1169 | width+=2; | |
1170 | break; | |
1171 | case '\r': | |
1172 | printf("\\r"); | |
1173 | width+=2; | |
1174 | break; | |
1175 | case '\t': | |
1176 | printf("\\t"); | |
1177 | width+=2; | |
1178 | break; | |
1179 | case '\v': | |
1180 | printf("\\v"); | |
1181 | width+=2; | |
1182 | break; | |
1183 | default: | |
1184 | printf("\\%02x",(*tmp)&0xFF); | |
1185 | width+=2; | |
1186 | break; | |
1187 | } | |
1188 | tmp++; | |
1189 | } | |
1190 | putchar('\n'); | |
1191 | } | |
1192 | break; | |
1193 | ||
1194 | case 'n': | |
1195 | if (feof(input_file)) goto quit; | |
1196 | ck_fwrite(line.text,1,line.length,stdout); | |
1197 | read_pattern_space(); | |
1198 | break; | |
1199 | ||
1200 | case 'N': | |
1201 | if (feof(input_file)) goto quit; | |
1202 | append_pattern_space(); | |
1203 | break; | |
1204 | ||
1205 | case 'p': | |
1206 | ck_fwrite(line.text,1,line.length,stdout); | |
1207 | break; | |
1208 | ||
1209 | case 'P': | |
1210 | { | |
1211 | char *tmp; | |
1212 | ||
1213 | tmp=memchr(line.text,'\n',line.length); | |
1214 | ck_fwrite(line.text, 1, | |
1215 | tmp ? tmp - line.text + 1 | |
1216 | : line.length, stdout); | |
1217 | } | |
1218 | break; | |
1219 | ||
1220 | case 'q': quit: | |
1221 | quit_cmd++; | |
1222 | end_cycle++; | |
1223 | break; | |
1224 | ||
1225 | case 'r': | |
1226 | { | |
1227 | int n = 0; | |
1228 | ||
1229 | rewind(cur_cmd->x.io_file); | |
1230 | do { | |
1231 | append.length += n; | |
1232 | if(append.length==append.alloc) { | |
1233 | append.alloc *= 2; | |
1234 | append.text = ck_realloc(append.text, append.alloc); | |
1235 | } | |
1236 | } while((n=fread(append.text+append.length,sizeof(char),append.alloc-append.length,cur_cmd->x.io_file))>0); | |
1237 | if(ferror(cur_cmd->x.io_file)) | |
1238 | panic("Read error on input file to 'r' command"); | |
1239 | } | |
1240 | break; | |
1241 | ||
1242 | case 's': | |
1243 | if(!tmp.alloc) { | |
1244 | tmp.alloc=50; | |
1245 | tmp.text=ck_malloc(50); | |
1246 | } | |
1247 | count=0; | |
1248 | start = 0; | |
1249 | remain=line.length-1; | |
1250 | tmp.length=0; | |
1251 | rep = cur_cmd->x.cmd_regex.replacement; | |
1252 | rep_end=rep+cur_cmd->x.cmd_regex.replace_length; | |
1253 | ||
1254 | while((offset = re_search(cur_cmd->x.cmd_regex.regx, | |
1255 | line.text, | |
1256 | line.length-1, | |
1257 | start, | |
1258 | remain, | |
1259 | ®s))>=0) { | |
1260 | count++; | |
1261 | if(offset-start) | |
1262 | str_append(&tmp,line.text+start,offset-start); | |
1263 | ||
1264 | if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT) { | |
1265 | if(count!=cur_cmd->x.cmd_regex.numb) { | |
1266 | str_append(&tmp,line.text+regs.start[0],regs.end[0]-regs.start[0]); | |
1267 | start = (offset == regs.end[0] ? offset + 1 : regs.end[0]); | |
1268 | remain = (line.length-1) - start; | |
1269 | continue; | |
1270 | } | |
1271 | } | |
1272 | ||
1273 | for(rep_next=rep_cur=rep;rep_next<rep_end;rep_next++) { | |
1274 | if(*rep_next=='&') { | |
1275 | if(rep_next-rep_cur) | |
1276 | str_append(&tmp,rep_cur,rep_next-rep_cur); | |
1277 | str_append(&tmp,line.text+regs.start[0],regs.end[0]-regs.start[0]); | |
1278 | rep_cur=rep_next+1; | |
1279 | } else if(*rep_next=='\\') { | |
1280 | if(rep_next-rep_cur) | |
1281 | str_append(&tmp,rep_cur,rep_next-rep_cur); | |
1282 | rep_next++; | |
1283 | if(rep_next!=rep_end) { | |
1284 | int n; | |
1285 | ||
1286 | if(*rep_next>='0' && *rep_next<='9') { | |
1287 | n= *rep_next -'0'; | |
1288 | str_append(&tmp,line.text+regs.start[n],regs.end[n]-regs.start[n]); | |
1289 | } else | |
1290 | str_append(&tmp,rep_next,1); | |
1291 | } | |
1292 | rep_cur=rep_next+1; | |
1293 | } | |
1294 | } | |
1295 | if(rep_next-rep_cur) | |
1296 | str_append(&tmp,rep_cur,rep_next-rep_cur); | |
1297 | if (offset == regs.end[0]) { | |
1298 | str_append(&tmp, line.text + offset, 1); | |
1299 | ++regs.end[0]; | |
1300 | } | |
1301 | start = regs.end[0]; | |
1302 | ||
1303 | remain = (line.length-1) - start; | |
1304 | if(remain<0) | |
1305 | break; | |
1306 | if(!(cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT)) | |
1307 | break; | |
1308 | } | |
1309 | if(!count) | |
1310 | break; | |
1311 | replaced=1; | |
1312 | str_append(&tmp,line.text+start,remain+1); | |
1313 | t.text=line.text; | |
1314 | t.length=line.length; | |
1315 | t.alloc=line.alloc; | |
1316 | line.text=tmp.text; | |
1317 | line.length=tmp.length; | |
1318 | line.alloc=tmp.alloc; | |
1319 | tmp.text=t.text; | |
1320 | tmp.length=t.length; | |
1321 | tmp.alloc=t.alloc; | |
1322 | if(cur_cmd->x.cmd_regex.flags&S_WRITE_BIT) | |
1323 | ck_fwrite(line.text,1,line.length,cur_cmd->x.cmd_regex.wio_file); | |
1324 | if(cur_cmd->x.cmd_regex.flags&S_PRINT_BIT) | |
1325 | ck_fwrite(line.text,1,line.length,stdout); | |
1326 | break; | |
1327 | ||
1328 | case 't': | |
1329 | if(replaced) { | |
1330 | replaced = 0; | |
1331 | if(!cur_cmd->x.jump) | |
1332 | end_cycle++; | |
1333 | else { | |
1334 | struct sed_label *j = cur_cmd->x.jump; | |
1335 | ||
1336 | n= j->v->v_length - j->v_index; | |
1337 | cur_cmd= j->v->v + j->v_index; | |
1338 | goto exe_loop; | |
1339 | } | |
1340 | } | |
1341 | break; | |
1342 | ||
1343 | case 'w': | |
1344 | ck_fwrite(line.text,1,line.length,cur_cmd->x.io_file); | |
1345 | break; | |
1346 | ||
1347 | case 'x': | |
1348 | { | |
1349 | struct line tmp; | |
1350 | ||
1351 | tmp=line; | |
1352 | line=hold; | |
1353 | hold=tmp; | |
1354 | } | |
1355 | break; | |
1356 | ||
1357 | case 'y': | |
1358 | { | |
1359 | unsigned char *p,*e; | |
1360 | ||
1361 | for(p=(unsigned char *)(line.text),e=p+line.length;p<e;p++) | |
1362 | *p=cur_cmd->x.translate[*p]; | |
1363 | } | |
1364 | break; | |
1365 | ||
1366 | default: | |
1367 | panic("INTERNAL ERROR: Bad cmd %c",cur_cmd->cmd); | |
1368 | } | |
1369 | if(end_cycle) | |
1370 | break; | |
1371 | } | |
1372 | } | |
1373 | ||
1374 | ||
1375 | /* Return non-zero if the current line matches the address | |
1376 | pointed to by 'addr'. */ | |
1377 | int | |
1378 | match_address(addr) | |
1379 | struct addr *addr; | |
1380 | { | |
1381 | switch(addr->addr_type) { | |
1382 | case ADDR_NULL: | |
1383 | return 1; | |
1384 | case ADDR_NUM: | |
1385 | return (input_line_number==addr->addr_number); | |
1386 | ||
1387 | case ADDR_REGEX: | |
1388 | return (re_search(addr->addr_regex, | |
1389 | line.text, | |
1390 | line.length-1, | |
1391 | 0, | |
1392 | line.length-1, | |
1393 | (struct re_registers *)0)>=0) ? 1 : 0; | |
1394 | ||
1395 | case ADDR_LAST: | |
1396 | return (input_EOF) ? 1 : 0; | |
1397 | ||
1398 | default: | |
1399 | panic("INTERNAL ERROR: bad address type"); | |
1400 | break; | |
1401 | } | |
1402 | return -1; | |
1403 | } | |
1404 | ||
1405 | /* Read in the next line of input, and store it in the | |
1406 | pattern space. Return non-zero if this is the last line of input */ | |
1407 | ||
1408 | int | |
1409 | read_pattern_space() | |
1410 | { | |
1411 | int n; | |
1412 | char *p; | |
1413 | int ch; | |
1414 | ||
1415 | p=line.text; | |
1416 | n=line.alloc; | |
1417 | ||
1418 | if(feof(input_file)) | |
1419 | return 0; | |
1420 | input_line_number++; | |
1421 | replaced=0; | |
1422 | for(;;) { | |
1423 | if(n==0) { | |
1424 | line.text=ck_realloc(line.text,line.alloc*2); | |
1425 | p=line.text+line.alloc; | |
1426 | n=line.alloc; | |
1427 | line.alloc*=2; | |
1428 | } | |
1429 | ch=getc(input_file); | |
1430 | if(ch==EOF) { | |
1431 | if(n==line.alloc) | |
1432 | return 0; | |
1433 | *p++='\n'; | |
1434 | --n; | |
1435 | line.length=line.alloc-n; | |
1436 | if(last_input_file) | |
1437 | input_EOF++; | |
1438 | return 1; | |
1439 | } | |
1440 | *p++=ch; | |
1441 | --n; | |
1442 | if(ch=='\n') { | |
1443 | line.length=line.alloc-n; | |
1444 | break; | |
1445 | } | |
1446 | } | |
1447 | ch=getc(input_file); | |
1448 | if(ch!=EOF) | |
1449 | ungetc(ch,input_file); | |
1450 | else if(last_input_file) | |
1451 | input_EOF++; | |
1452 | return 1; | |
1453 | } | |
1454 | ||
1455 | /* Inplement the 'N' command, which appends the next line of input to | |
1456 | the pattern space. */ | |
1457 | void | |
1458 | append_pattern_space() | |
1459 | { | |
1460 | char *p; | |
1461 | int n; | |
1462 | int ch; | |
1463 | ||
1464 | p=line.text+line.length; | |
1465 | n=line.alloc-line.length; | |
1466 | ||
1467 | input_line_number++; | |
1468 | replaced=0; | |
1469 | for(;;) { | |
1470 | ch=getc(input_file); | |
1471 | if(ch==EOF) { | |
1472 | if(n==line.alloc) | |
1473 | return; | |
1474 | *p++='\n'; | |
1475 | --n; | |
1476 | line.length=line.alloc-n; | |
1477 | if(last_input_file) | |
1478 | input_EOF++; | |
1479 | return; | |
1480 | } | |
1481 | *p++=ch; | |
1482 | --n; | |
1483 | if(ch=='\n') { | |
1484 | line.length=line.alloc-n; | |
1485 | break; | |
1486 | } | |
1487 | if(n==0) { | |
1488 | line.text=ck_realloc(line.text,line.alloc*2); | |
1489 | p=line.text+line.alloc; | |
1490 | n=line.alloc; | |
1491 | line.alloc*=2; | |
1492 | } | |
1493 | } | |
1494 | ch=getc(input_file); | |
1495 | if(ch!=EOF) | |
1496 | ungetc(ch,input_file); | |
1497 | else if(last_input_file) | |
1498 | input_EOF++; | |
1499 | } | |
1500 | ||
1501 | /* Copy the contents of the line 'from' into the line 'to'. | |
1502 | This destroys the old contents of 'to'. It will still work | |
1503 | if the line 'from' contains nulls. */ | |
1504 | void | |
1505 | line_copy(from,to) | |
1506 | struct line *from,*to; | |
1507 | { | |
1508 | if(from->length>to->alloc) { | |
1509 | to->alloc=from->length; | |
1510 | to->text=ck_realloc(to->text,to->alloc); | |
1511 | } | |
1512 | bcopy(from->text,to->text,from->length); | |
1513 | to->length=from->length; | |
1514 | } | |
1515 | ||
1516 | /* Append the contents of the line 'from' to the line 'to'. | |
1517 | This routine will work even if the line 'from' contains nulls */ | |
1518 | void | |
1519 | line_append(from,to) | |
1520 | struct line *from,*to; | |
1521 | { | |
1522 | if(from->length>(to->alloc-to->length)) { | |
1523 | to->alloc+=from->length; | |
1524 | to->text=ck_realloc(to->text,to->alloc); | |
1525 | } | |
1526 | bcopy(from->text,to->text+to->length,from->length); | |
1527 | to->length+=from->length; | |
1528 | } | |
1529 | ||
1530 | /* Append 'length' bytes from 'string' to the line 'to' | |
1531 | This routine *will* append bytes with nulls in them, without | |
1532 | failing. */ | |
1533 | void | |
1534 | str_append(to,string,length) | |
1535 | struct line *to; | |
1536 | char *string; | |
1537 | int length; | |
1538 | { | |
1539 | if(length>to->alloc-to->length) { | |
1540 | to->alloc+=length; | |
1541 | to->text=ck_realloc(to->text,to->alloc); | |
1542 | } | |
1543 | bcopy(string,to->text+to->length,length); | |
1544 | to->length+=length; | |
1545 | } | |
1546 | ||
1547 | void | |
1548 | usage() | |
1549 | { | |
1550 | fprintf(stderr, "\ | |
1551 | Usage: %s [-nV] [+quiet] [+silent] [+version] [-e script] [-f script-file]\n\ | |
1552 | [+expression=script] [+file=script-file] [file...]\n", myname); | |
1553 | exit(4); | |
1554 | } |