Commit | Line | Data |
---|---|---|
09d678d6 WJ |
1 | /* grep - print lines matching an extended regular expression |
2 | Copyright (C) 1988 Free Software Foundation, Inc. | |
3 | Written June, 1988 by Mike Haertel | |
4 | BMG speedups added July, 1988 | |
5 | by James A. Woods and Arthur David Olson | |
6 | ||
7 | NO WARRANTY | |
8 | ||
9 | BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY | |
10 | NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT | |
11 | WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC, | |
12 | RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS" | |
13 | WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, | |
14 | BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
15 | FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY | |
16 | AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE | |
17 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR | |
18 | CORRECTION. | |
19 | ||
20 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M. | |
21 | STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY | |
22 | WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE | |
23 | LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR | |
24 | OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE | |
25 | USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR | |
26 | DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR | |
27 | A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS | |
28 | PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH | |
29 | DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. | |
30 | ||
31 | GENERAL PUBLIC LICENSE TO COPY | |
32 | ||
33 | 1. You may copy and distribute verbatim copies of this source file | |
34 | as you receive it, in any medium, provided that you conspicuously and | |
35 | appropriately publish on each copy a valid copyright notice "Copyright | |
36 | (C) 1988 Free Software Foundation, Inc."; and include following the | |
37 | copyright notice a verbatim copy of the above disclaimer of warranty | |
38 | and of this License. You may charge a distribution fee for the | |
39 | physical act of transferring a copy. | |
40 | ||
41 | 2. You may modify your copy or copies of this source file or | |
42 | any portion of it, and copy and distribute such modifications under | |
43 | the terms of Paragraph 1 above, provided that you also do the following: | |
44 | ||
45 | a) cause the modified files to carry prominent notices stating | |
46 | that you changed the files and the date of any change; and | |
47 | ||
48 | b) cause the whole of any work that you distribute or publish, | |
49 | that in whole or in part contains or is a derivative of this | |
50 | program or any part thereof, to be licensed at no charge to all | |
51 | third parties on terms identical to those contained in this | |
52 | License Agreement (except that you may choose to grant more extensive | |
53 | warranty protection to some or all third parties, at your option). | |
54 | ||
55 | c) You may charge a distribution fee for the physical act of | |
56 | transferring a copy, and you may at your option offer warranty | |
57 | protection in exchange for a fee. | |
58 | ||
59 | Mere aggregation of another unrelated program with this program (or its | |
60 | derivative) on a volume of a storage or distribution medium does not bring | |
61 | the other program under the scope of these terms. | |
62 | ||
63 | 3. You may copy and distribute this program or any portion of it in | |
64 | compiled, executable or object code form under the terms of Paragraphs | |
65 | 1 and 2 above provided that you do the following: | |
66 | ||
67 | a) accompany it with the complete corresponding machine-readable | |
68 | source code, which must be distributed under the terms of | |
69 | Paragraphs 1 and 2 above; or, | |
70 | ||
71 | b) accompany it with a written offer, valid for at least three | |
72 | years, to give any third party free (except for a nominal | |
73 | shipping charge) a complete machine-readable copy of the | |
74 | corresponding source code, to be distributed under the terms of | |
75 | Paragraphs 1 and 2 above; or, | |
76 | ||
77 | c) accompany it with the information you received as to where the | |
78 | corresponding source code may be obtained. (This alternative is | |
79 | allowed only for noncommercial distribution and only if you | |
80 | received the program in object code or executable form alone.) | |
81 | ||
82 | For an executable file, complete source code means all the source code for | |
83 | all modules it contains; but, as a special exception, it need not include | |
84 | source code for modules which are standard libraries that accompany the | |
85 | operating system on which the executable file runs. | |
86 | ||
87 | 4. You may not copy, sublicense, distribute or transfer this program | |
88 | except as expressly provided under this License Agreement. Any attempt | |
89 | otherwise to copy, sublicense, distribute or transfer this program is void and | |
90 | your rights to use the program under this License agreement shall be | |
91 | automatically terminated. However, parties who have received computer | |
92 | software programs from you with this License Agreement will not have | |
93 | their licenses terminated so long as such parties remain in full compliance. | |
94 | ||
95 | 5. If you wish to incorporate parts of this program into other free | |
96 | programs whose distribution conditions are different, write to the Free | |
97 | Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet | |
98 | worked out a simple rule that can be stated here, but we will often permit | |
99 | this. We will be guided by the two goals of preserving the free status of | |
100 | all derivatives our free software and of promoting the sharing and reuse of | |
101 | software. | |
102 | ||
103 | ||
104 | In other words, you are welcome to use, share and improve this program. | |
105 | You are forbidden to forbid anyone else to use, share and improve | |
106 | what you give them. Help stamp out software-hoarding! */ | |
107 | \f | |
108 | #include <ctype.h> | |
109 | #include <stdio.h> | |
110 | #ifdef USG | |
111 | #include <memory.h> | |
112 | #include <string.h> | |
113 | #else | |
114 | #include <strings.h> | |
115 | #endif | |
116 | #include "dfa.h" | |
117 | #include "regex.h" | |
118 | ||
119 | #ifdef __STDC__ | |
120 | /*extern getopt(int, char **, const char *); | |
121 | extern read(int, void *, int); | |
122 | extern open(const char *, int, ...); | |
123 | extern void close();*/ | |
124 | #else | |
125 | extern char *strrchr(); | |
126 | #endif | |
127 | ||
128 | extern char *optarg; | |
129 | extern optind, opterr; | |
130 | extern errno; | |
131 | extern char *sys_errlist[]; | |
132 | ||
133 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) | |
134 | ||
135 | /* Exit status codes. */ | |
136 | #define MATCHES_FOUND 0 /* Exit 0 if no errors and matches found. */ | |
137 | #define NO_MATCHES_FOUND 1 /* Exit 1 if no matches were found. */ | |
138 | #define ERROR 2 /* Exit 2 if some error occurred. */ | |
139 | ||
140 | /* Error is set true if something awful happened. */ | |
141 | static int error; | |
142 | ||
143 | /* The program name for error messages. */ | |
144 | static char *prog; | |
145 | ||
146 | /* We do all our own buffering by hand for efficiency. */ | |
147 | static char *buffer; /* The buffer itself, grown as needed. */ | |
148 | static bufbytes; /* Number of bytes in the buffer. */ | |
149 | static size_t bufalloc; /* Number of bytes allocated to the buffer. */ | |
150 | static bufprev; /* Number of bytes that have been forgotten. | |
151 | This is used to get byte offsets from the | |
152 | beginning of the file. */ | |
153 | static bufread; /* Number of bytes to get with each read(). */ | |
154 | ||
155 | static void | |
156 | initialize_buffer() | |
157 | { | |
158 | bufread = 8192; | |
159 | bufalloc = bufread + bufread / 2; | |
160 | buffer = malloc(bufalloc); | |
161 | if (! buffer) | |
162 | { | |
163 | fprintf(stderr, "%s: Memory exhausted (%s)\n", prog, | |
164 | sys_errlist[errno]); | |
165 | exit(ERROR); | |
166 | } | |
167 | } | |
168 | ||
169 | /* The current input file. */ | |
170 | static fd; | |
171 | static char *filename; | |
172 | static eof; | |
173 | ||
174 | /* Fill the buffer retaining the last n bytes at the beginning of the | |
175 | newly filled buffer (for backward context). Returns the number of new | |
176 | bytes read from disk. */ | |
177 | static | |
178 | fill_buffer_retaining(n) | |
179 | int n; | |
180 | { | |
181 | char *p, *q; | |
182 | int i; | |
183 | ||
184 | /* See if we need to grow the buffer. */ | |
185 | if (bufalloc - n <= bufread) | |
186 | { | |
187 | while (bufalloc - n <= bufread) | |
188 | { | |
189 | bufalloc *= 2; | |
190 | bufread *= 2; | |
191 | } | |
192 | buffer = realloc(buffer, bufalloc); | |
193 | if (! buffer) | |
194 | { | |
195 | fprintf(stderr, "%s: Memory exhausted (%s)\n", prog, | |
196 | sys_errlist[errno]); | |
197 | exit(ERROR); | |
198 | } | |
199 | } | |
200 | ||
201 | bufprev += bufbytes - n; | |
202 | ||
203 | /* Shift stuff down. */ | |
204 | for (i = n, p = buffer, q = p + bufbytes - n; i--; ) | |
205 | *p++ = *q++; | |
206 | bufbytes = n; | |
207 | ||
208 | if (eof) | |
209 | return 0; | |
210 | ||
211 | /* Read in new stuff. */ | |
212 | i = read(fd, buffer + bufbytes, bufread); | |
213 | if (i < 0) | |
214 | { | |
215 | fprintf(stderr, "%s: read on %s failed (%s)\n", prog, | |
216 | filename ? filename : "<stdin>", sys_errlist[errno]); | |
217 | error = 1; | |
218 | } | |
219 | ||
220 | /* Kludge to pretend every nonempty file ends with a newline. */ | |
221 | if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n') | |
222 | { | |
223 | eof = i = 1; | |
224 | buffer[bufbytes] = '\n'; | |
225 | } | |
226 | ||
227 | bufbytes += i; | |
228 | return i; | |
229 | } | |
230 | \f | |
231 | /* Various flags set according to the argument switches. */ | |
232 | static trailing_context; /* Lines of context to show after matches. */ | |
233 | static leading_context; /* Lines of context to show before matches. */ | |
234 | static byte_count; /* Precede output lines the byte count of the | |
235 | first character on the line. */ | |
236 | static no_filenames; /* Do not display filenames. */ | |
237 | static line_numbers; /* Precede output lines with line numbers. */ | |
238 | static silent; /* Produce no output at all. This switch | |
239 | is bogus, ever hear of /dev/null? */ | |
240 | static nonmatching_lines; /* Print lines that don't match the regexp. */ | |
241 | ||
242 | static bmgexec; /* Invoke Boyer-Moore-Gosper routines */ | |
243 | ||
244 | /* The compiled regular expression lives here. */ | |
245 | static struct regexp reg; | |
246 | ||
247 | /* The compiled regular expression for the backtracking matcher lives here. */ | |
248 | static struct re_pattern_buffer regex; | |
249 | ||
250 | /* Pointer in the buffer after the last character printed. */ | |
251 | static char *printed_limit; | |
252 | ||
253 | /* True when printed_limit has been artifically advanced without printing | |
254 | anything. */ | |
255 | static int printed_limit_fake; | |
256 | ||
257 | /* Print a line at the given line number, returning the number of | |
258 | characters actually printed. Matching is true if the line is to | |
259 | be considered a "matching line". This is only meaningful if | |
260 | surrounding context is turned on. */ | |
261 | static | |
262 | print_line(p, number, matching) | |
263 | char *p; | |
264 | int number; | |
265 | int matching; | |
266 | { | |
267 | int count = 0; | |
268 | ||
269 | if (silent) | |
270 | { | |
271 | do | |
272 | ++count; | |
273 | while (*p++ != '\n'); | |
274 | printed_limit_fake = 0; | |
275 | printed_limit = p; | |
276 | return count; | |
277 | } | |
278 | ||
279 | if (filename && !no_filenames) | |
280 | printf("%s%c", filename, matching ? ':' : '-'); | |
281 | if (byte_count) | |
282 | printf("%d%c", p - buffer + bufprev, matching ? ':' : '-'); | |
283 | if (line_numbers) | |
284 | printf("%d%c", number, matching ? ':' : '-'); | |
285 | do | |
286 | { | |
287 | ++count; | |
288 | putchar(*p); | |
289 | } | |
290 | while (*p++ != '\n'); | |
291 | printed_limit_fake = 0; | |
292 | printed_limit = p; | |
293 | return count; | |
294 | } | |
295 | ||
296 | /* Print matching or nonmatching lines from the current file. Returns a | |
297 | count of matching or nonmatching lines. */ | |
298 | static | |
299 | grep() | |
300 | { | |
301 | int retain = 0; /* Number of bytes to retain on next call | |
302 | to fill_buffer_retaining(). */ | |
303 | char *search_limit; /* Pointer to the character after the last | |
304 | newline in the buffer. */ | |
305 | char saved_char; /* Character after the last newline. */ | |
306 | char *resume; /* Pointer to where to resume search. */ | |
307 | int resume_index = 0; /* Count of characters to ignore after | |
308 | refilling the buffer. */ | |
309 | int line_count = 1; /* Line number. */ | |
310 | int try_backref; /* Set to true if we need to verify the | |
311 | match with a backtracking matcher. */ | |
312 | int initial_line_count; /* Line count at beginning of last search. */ | |
313 | char *match; /* Pointer to the first character after the | |
314 | string matching the regexp. */ | |
315 | int match_count = 0; /* Count of matching lines. */ | |
316 | char *matching_line; /* Pointer to first character of the matching | |
317 | line, or of the first line of context to | |
318 | print if context is turned on. */ | |
319 | char *real_matching_line; /* Pointer to the first character of the | |
320 | real matching line. */ | |
321 | char *next_line; /* Pointer to first character of the line | |
322 | following the matching line. */ | |
323 | int pending_lines = 0; /* Lines of context left over from last match | |
324 | that we have to print. */ | |
325 | static first_match = 1; /* True when nothing has been printed. */ | |
326 | int i; | |
327 | char *tmp; | |
328 | char *execute(); | |
329 | ||
330 | printed_limit_fake = 0; | |
331 | ||
332 | while (fill_buffer_retaining(retain) > 0) | |
333 | { | |
334 | /* Find the last newline in the buffer. */ | |
335 | search_limit = buffer + bufbytes; | |
336 | while (search_limit > buffer && search_limit[-1] != '\n') | |
337 | --search_limit; | |
338 | if (search_limit == buffer) | |
339 | { | |
340 | retain = bufbytes; | |
341 | continue; | |
342 | } | |
343 | ||
344 | /* Save the character after the last newline so regexecute can write | |
345 | its own sentinel newline. */ | |
346 | saved_char = *search_limit; | |
347 | ||
348 | /* Search the buffer for a match. */ | |
349 | printed_limit = buffer; | |
350 | resume = buffer + resume_index; | |
351 | initial_line_count = line_count; | |
352 | ||
353 | while (match = execute(®, resume, search_limit, 0, &line_count, &try_backref)) | |
354 | { | |
355 | ++match_count; | |
356 | ||
357 | /* Find the beginning of the matching line. */ | |
358 | matching_line = match; | |
359 | while (matching_line > resume && matching_line[-1] != '\n') | |
360 | --matching_line; | |
361 | real_matching_line = matching_line; | |
362 | ||
363 | /* Find the beginning of the next line. */ | |
364 | next_line = match; | |
365 | while (next_line < search_limit && *next_line++ != '\n') | |
366 | ; | |
367 | ||
368 | /* If a potential backreference is indicated, try it out with | |
369 | a backtracking matcher to make sure the line is a match. */ | |
370 | if (try_backref && re_search(®ex, matching_line, | |
371 | next_line - matching_line - 1, | |
372 | 0, | |
373 | next_line - matching_line - 1, | |
374 | NULL) < 0) | |
375 | { | |
376 | resume = next_line; | |
377 | if (resume == search_limit) | |
378 | break; | |
379 | else | |
380 | continue; | |
381 | } | |
382 | ||
383 | /* Print leftover lines from last time. If nonmatching_lines is | |
384 | turned on, print these as if they were matching lines. */ | |
385 | while (resume < matching_line && pending_lines) | |
386 | { | |
387 | resume += print_line(resume, initial_line_count++, | |
388 | nonmatching_lines); | |
389 | --pending_lines; | |
390 | } | |
391 | ||
392 | /* Print out the matching or nonmatching lines as necessary. */ | |
393 | if (! nonmatching_lines) | |
394 | { | |
395 | /* Back up over leading context if necessary. */ | |
396 | for (i = leading_context; matching_line > printed_limit | |
397 | && i; --i) | |
398 | { | |
399 | while (matching_line > printed_limit | |
400 | && (--matching_line)[-1] != '\n') | |
401 | ; | |
402 | --line_count; | |
403 | } | |
404 | ||
405 | /* If context is enabled, we may have to print a separator. */ | |
406 | if ((leading_context || trailing_context) && !silent | |
407 | && !first_match && (printed_limit_fake || matching_line | |
408 | > printed_limit)) | |
409 | printf("----------\n"); | |
410 | first_match = 0; | |
411 | ||
412 | /* Print the matching line and its leading context. */ | |
413 | while (matching_line < real_matching_line) | |
414 | matching_line += print_line(matching_line, line_count++, 0); | |
415 | matching_line += print_line(matching_line, line_count++, 1); | |
416 | ||
417 | /* If there's trailing context, leave some lines pending until | |
418 | next time. */ | |
419 | pending_lines = trailing_context; | |
420 | } | |
421 | else if (matching_line > resume) | |
422 | { | |
423 | char *real_resume = resume; | |
424 | ||
425 | /* Back up over leading context if necessary. */ | |
426 | for (i = leading_context; resume > printed_limit && i; --i) | |
427 | { | |
428 | while (resume > printed_limit && (--resume)[-1] != '\n') | |
429 | ; | |
430 | --initial_line_count; | |
431 | } | |
432 | ||
433 | /* If context is enabled, we may have to print a separator. */ | |
434 | if ((leading_context || trailing_context) && !silent | |
435 | && !first_match && (printed_limit_fake || resume | |
436 | > printed_limit)) | |
437 | printf("----------\n"); | |
438 | first_match = 0; | |
439 | ||
440 | /* Print out the presumably matching leading context. */ | |
441 | while (resume < real_resume) | |
442 | resume += print_line(resume, initial_line_count++, 0); | |
443 | ||
444 | /* Print out the nonmatching lines prior to the matching line. */ | |
445 | while (resume < matching_line) | |
446 | resume += print_line(resume, initial_line_count++, 1); | |
447 | ||
448 | /* Deal with trailing context. */ | |
449 | if (trailing_context) | |
450 | { | |
451 | print_line(matching_line, line_count, 0); | |
452 | pending_lines = trailing_context - 1; | |
453 | } | |
454 | ||
455 | /* Count the current line. */ | |
456 | ++line_count; | |
457 | } | |
458 | else | |
459 | { | |
460 | /* The line immediately after a matching line has to be printed | |
461 | because it was pending. */ | |
462 | if (pending_lines > 0) | |
463 | { | |
464 | --pending_lines; | |
465 | print_line(matching_line, line_count, 0); | |
466 | } | |
467 | ++line_count; | |
468 | } | |
469 | ||
470 | /* Resume searching at the beginning of the next line. */ | |
471 | initial_line_count = line_count; | |
472 | resume = next_line; | |
473 | ||
474 | if (resume == search_limit) | |
475 | break; | |
476 | } | |
477 | ||
478 | /* Restore the saved character. */ | |
479 | *search_limit = saved_char; | |
480 | ||
481 | if (! nonmatching_lines) | |
482 | { | |
483 | while (resume < search_limit && pending_lines) | |
484 | { | |
485 | resume += print_line(resume, initial_line_count++, 0); | |
486 | --pending_lines; | |
487 | } | |
488 | } | |
489 | else if (search_limit > resume) | |
490 | { | |
491 | char *initial_resume = resume; | |
492 | ||
493 | /* Back up over leading context if necessary. */ | |
494 | for (i = leading_context; resume > printed_limit && i; --i) | |
495 | { | |
496 | while (resume > printed_limit && (--resume)[-1] != '\n') | |
497 | ; | |
498 | --initial_line_count; | |
499 | } | |
500 | ||
501 | /* If context is enabled, we may have to print a separator. */ | |
502 | if ((leading_context || trailing_context) && !silent | |
503 | && !first_match && (printed_limit_fake || resume | |
504 | > printed_limit)) | |
505 | printf("----------\n"); | |
506 | first_match = 0; | |
507 | ||
508 | /* Print out all the nonmatching lines up to the search limit. */ | |
509 | while (resume < initial_resume) | |
510 | resume += print_line(resume, initial_line_count++, 0); | |
511 | while (resume < search_limit) | |
512 | resume += print_line(resume, initial_line_count++, 1); | |
513 | ||
514 | pending_lines = trailing_context; | |
515 | resume_index = 0; | |
516 | retain = bufbytes - (search_limit - buffer); | |
517 | continue; | |
518 | } | |
519 | ||
520 | /* Save the trailing end of the buffer for possible use as leading | |
521 | context in the future. */ | |
522 | i = leading_context; | |
523 | tmp = search_limit; | |
524 | while (tmp > printed_limit && i--) | |
525 | while (tmp > printed_limit && (--tmp)[-1] != '\n') | |
526 | ; | |
527 | resume_index = search_limit - tmp; | |
528 | retain = bufbytes - (tmp - buffer); | |
529 | if (tmp > printed_limit) | |
530 | printed_limit_fake = 1; | |
531 | } | |
532 | ||
533 | return nonmatching_lines ? (line_count - 1) - match_count : match_count; | |
534 | } | |
535 | \f | |
536 | void | |
537 | usage_and_die() | |
538 | { | |
539 | fprintf(stderr, | |
540 | "usage: %s [-CVbchilnsvwx] [-<num>] [-AB <num>] [-f file] [-e] expr [files]\n", | |
541 | prog); | |
542 | exit(ERROR); | |
543 | } | |
544 | ||
545 | static char version[] = "GNU e?grep, version 1.5"; | |
546 | ||
547 | main(argc, argv) | |
548 | int argc; | |
549 | char **argv; | |
550 | { | |
551 | int c; | |
552 | int ignore_case = 0; /* Compile the regexp to ignore case. */ | |
553 | char *the_regexp = 0; /* The regular expression. */ | |
554 | int regexp_len; /* Length of the regular expression. */ | |
555 | char *regexp_file = 0; /* File containing parallel regexps. */ | |
556 | int count_lines = 0; /* Display only a count of matching lines. */ | |
557 | int list_files = 0; /* Display only the names of matching files. */ | |
558 | int whole_word = 0; /* Insist that the regexp match a word only. */ | |
559 | int whole_line = 0; /* Insist on matching only whole lines. */ | |
560 | int line_count = 0; /* Count of matching lines for a file. */ | |
561 | int matches_found = 0; /* True if matches were found. */ | |
562 | char *regex_errmesg; /* Error message from regex routines. */ | |
563 | char translate[_NOTCHAR]; /* Translate table for case conversion | |
564 | (needed by the backtracking matcher). */ | |
565 | ||
566 | if (prog = strrchr(argv[0], '/')) | |
567 | ++prog; | |
568 | else | |
569 | prog = argv[0]; | |
570 | ||
571 | opterr = 0; | |
572 | while ((c = getopt(argc, argv, "0123456789A:B:CVbce:f:hilnsvwx")) != EOF) | |
573 | switch (c) | |
574 | { | |
575 | case '?': | |
576 | usage_and_die(); | |
577 | break; | |
578 | ||
579 | case '0': | |
580 | case '1': | |
581 | case '2': | |
582 | case '3': | |
583 | case '4': | |
584 | case '5': | |
585 | case '6': | |
586 | case '7': | |
587 | case '8': | |
588 | case '9': | |
589 | trailing_context = 10 * trailing_context + c - '0'; | |
590 | leading_context = 10 * leading_context + c - '0'; | |
591 | break; | |
592 | ||
593 | case 'A': | |
594 | if (! sscanf(optarg, "%d", &trailing_context) | |
595 | || trailing_context < 0) | |
596 | usage_and_die(); | |
597 | break; | |
598 | ||
599 | case 'B': | |
600 | if (! sscanf(optarg, "%d", &leading_context) | |
601 | || leading_context < 0) | |
602 | usage_and_die(); | |
603 | break; | |
604 | ||
605 | case 'C': | |
606 | trailing_context = leading_context = 2; | |
607 | break; | |
608 | ||
609 | case 'V': | |
610 | fprintf(stderr, "%s\n", version); | |
611 | break; | |
612 | ||
613 | case 'b': | |
614 | byte_count = 1; | |
615 | break; | |
616 | ||
617 | case 'c': | |
618 | count_lines = 1; | |
619 | silent = 1; | |
620 | break; | |
621 | ||
622 | case 'e': | |
623 | /* It doesn't make sense to mix -f and -e. */ | |
624 | if (regexp_file) | |
625 | usage_and_die(); | |
626 | the_regexp = optarg; | |
627 | break; | |
628 | ||
629 | case 'f': | |
630 | /* It doesn't make sense to mix -f and -e. */ | |
631 | if (the_regexp) | |
632 | usage_and_die(); | |
633 | regexp_file = optarg; | |
634 | break; | |
635 | ||
636 | case 'h': | |
637 | no_filenames = 1; | |
638 | break; | |
639 | ||
640 | case 'i': | |
641 | ignore_case = 1; | |
642 | for (c = 0; c < _NOTCHAR; ++c) | |
643 | if (isupper(c)) | |
644 | translate[c] = tolower(c); | |
645 | else | |
646 | translate[c] = c; | |
647 | regex.translate = translate; | |
648 | break; | |
649 | ||
650 | case 'l': | |
651 | list_files = 1; | |
652 | silent = 1; | |
653 | break; | |
654 | ||
655 | case 'n': | |
656 | line_numbers = 1; | |
657 | break; | |
658 | ||
659 | case 's': | |
660 | silent = 1; | |
661 | break; | |
662 | ||
663 | case 'v': | |
664 | nonmatching_lines = 1; | |
665 | break; | |
666 | ||
667 | case 'w': | |
668 | whole_word = 1; | |
669 | break; | |
670 | ||
671 | case 'x': | |
672 | whole_line = 1; | |
673 | break; | |
674 | ||
675 | default: | |
676 | /* This can't happen. */ | |
677 | fprintf(stderr, "%s: getopt(3) let one by!\n", prog); | |
678 | usage_and_die(); | |
679 | break; | |
680 | } | |
681 | ||
682 | /* Set the syntax depending on whether we are EGREP or not. */ | |
683 | #ifdef EGREP | |
684 | regsyntax(RE_SYNTAX_EGREP, ignore_case); | |
685 | re_set_syntax(RE_SYNTAX_EGREP); | |
686 | #else | |
687 | regsyntax(RE_SYNTAX_GREP, ignore_case); | |
688 | re_set_syntax(RE_SYNTAX_GREP); | |
689 | #endif | |
690 | ||
691 | /* Compile the regexp according to all the options. */ | |
692 | if (regexp_file) | |
693 | { | |
694 | FILE *fp = fopen(regexp_file, "r"); | |
695 | int len = 256; | |
696 | int i = 0; | |
697 | ||
698 | if (! fp) | |
699 | { | |
700 | fprintf(stderr, "%s: %s: %s\n", prog, regexp_file, | |
701 | sys_errlist[errno]); | |
702 | exit(ERROR); | |
703 | } | |
704 | ||
705 | the_regexp = malloc(len); | |
706 | while ((c = getc(fp)) != EOF) | |
707 | { | |
708 | the_regexp[i++] = c; | |
709 | if (i == len) | |
710 | the_regexp = realloc(the_regexp, len *= 2); | |
711 | } | |
712 | fclose(fp); | |
713 | /* Nuke the concluding newline so we won't match the empty string. */ | |
714 | if (i > 0 && the_regexp[i - 1] == '\n') | |
715 | --i; | |
716 | regexp_len = i; | |
717 | } | |
718 | else if (! the_regexp) | |
719 | { | |
720 | if (optind >= argc) | |
721 | usage_and_die(); | |
722 | the_regexp = argv[optind++]; | |
723 | regexp_len = strlen(the_regexp); | |
724 | } | |
725 | else | |
726 | regexp_len = strlen(the_regexp); | |
727 | ||
728 | if (whole_word || whole_line) | |
729 | { | |
730 | char *n = malloc(regexp_len + 8); | |
731 | int i = 0; | |
732 | ||
733 | if (whole_line) | |
734 | n[i++] = '^'; | |
735 | else | |
736 | n[i++] = '\\', n[i++] = '<'; | |
737 | #ifndef EGREP | |
738 | n[i++] = '\\'; | |
739 | #endif | |
740 | n[i++] = '('; | |
741 | memcpy(n + i, the_regexp, regexp_len); | |
742 | i += regexp_len; | |
743 | #ifndef EGREP | |
744 | n[i++] = '\\'; | |
745 | #endif | |
746 | n[i++] = ')'; | |
747 | if (whole_line) | |
748 | n[i++] = '$'; | |
749 | else | |
750 | n[i++] = '\\', n[i++] = '>'; | |
751 | the_regexp = n; | |
752 | regexp_len = i; | |
753 | } | |
754 | ||
755 | regcompile(the_regexp, regexp_len, ®, 1); | |
756 | ||
757 | if (regex_errmesg = re_compile_pattern(the_regexp, regexp_len, ®ex)) | |
758 | regerror(regex_errmesg); | |
759 | ||
760 | /* | |
761 | Find the longest metacharacter-free string which must occur in the | |
762 | regexpr, before short-circuiting regexecute() with Boyer-Moore-Gosper. | |
763 | (Conjecture: The problem in general is NP-complete.) If there is no | |
764 | such string (like for many alternations), then default to full automaton | |
765 | search. regmust() code and heuristics [see dfa.c] courtesy | |
766 | Arthur David Olson. | |
767 | */ | |
768 | if (line_numbers == 0 && nonmatching_lines == 0) | |
769 | { | |
770 | if (reg.mustn == 0 || reg.mustn == MUST_MAX || | |
771 | strchr(reg.must, '\0') != reg.must + reg.mustn) | |
772 | bmgexec = 0; | |
773 | else | |
774 | { | |
775 | reg.must[reg.mustn] = '\0'; | |
776 | if (getenv("MUSTDEBUG") != NULL) | |
777 | (void) printf("must have: \"%s\"\n", reg.must); | |
778 | bmg_setup(reg.must, ignore_case); | |
779 | bmgexec = 1; | |
780 | } | |
781 | } | |
782 | ||
783 | if (argc - optind < 2) | |
784 | no_filenames = 1; | |
785 | ||
786 | initialize_buffer(); | |
787 | ||
788 | if (argc > optind) | |
789 | while (optind < argc) | |
790 | { | |
791 | bufprev = eof = 0; | |
792 | filename = argv[optind++]; | |
793 | fd = open(filename, 0, 0); | |
794 | if (fd < 0) | |
795 | { | |
796 | fprintf(stderr, "%s: %s: %s\n", prog, filename, | |
797 | sys_errlist[errno]); | |
798 | error = 1; | |
799 | continue; | |
800 | } | |
801 | if (line_count = grep()) | |
802 | matches_found = 1; | |
803 | close(fd); | |
804 | if (count_lines) | |
805 | if (!no_filenames) | |
806 | printf("%s:%d\n", filename, line_count); | |
807 | else | |
808 | printf("%d\n", line_count); | |
809 | else if (list_files && line_count) | |
810 | printf("%s\n", filename); | |
811 | } | |
812 | else | |
813 | { | |
814 | if (line_count = grep()) | |
815 | matches_found = 1; | |
816 | if (count_lines) | |
817 | printf("%d\n", line_count); | |
818 | else if (list_files && line_count) | |
819 | printf("<stdin>\n"); | |
820 | } | |
821 | ||
822 | if (error) | |
823 | exit(ERROR); | |
824 | if (matches_found) | |
825 | exit(MATCHES_FOUND); | |
826 | exit(NO_MATCHES_FOUND); | |
827 | } | |
828 | ||
829 | /* Needed by the regexp routines. This could be fancier, especially when | |
830 | dealing with parallel regexps in files. */ | |
831 | void | |
832 | regerror(s) | |
833 | const char *s; | |
834 | { | |
835 | fprintf(stderr, "%s: %s\n", prog, s); | |
836 | exit(ERROR); | |
837 | } | |
838 | ||
839 | /* | |
840 | bmg_setup() and bmg_search() adapted from: | |
841 | Boyer/Moore/Gosper-assisted 'egrep' search, with delta0 table as in | |
842 | original paper (CACM, October, 1977). No delta1 or delta2. According to | |
843 | experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of minimal | |
844 | practical value. However, to improve for worst case input, integrating | |
845 | the improved Galil strategies (Apostolico/Giancarlo, Siam. J. Comput., | |
846 | February 1986) deserves consideration. | |
847 | ||
848 | James A. Woods Copyleft (C) 1986, 1988 | |
849 | NASA Ames Research Center | |
850 | */ | |
851 | ||
852 | char * | |
853 | execute(r, begin, end, newline, count, try_backref) | |
854 | struct regexp *r; | |
855 | char *begin; | |
856 | char *end; | |
857 | int newline; | |
858 | int *count; | |
859 | int *try_backref; | |
860 | { | |
861 | register char *p, *s; | |
862 | char *match; | |
863 | char *start = begin; | |
864 | char save; /* regexecute() sentinel */ | |
865 | int len; | |
866 | char *bmg_search(); | |
867 | ||
868 | if (!bmgexec) /* full automaton search */ | |
869 | return(regexecute(r, begin, end, newline, count, try_backref)); | |
870 | else | |
871 | { | |
872 | len = end - begin; | |
873 | while ((match = bmg_search((unsigned char *) start, len)) != NULL) | |
874 | { | |
875 | p = match; /* narrow search range to submatch line */ | |
876 | while (p > begin && *p != '\n') | |
877 | p--; | |
878 | s = match; | |
879 | while (s < end && *s != '\n') | |
880 | s++; | |
881 | s++; | |
882 | ||
883 | save = *s; | |
884 | *s = '\0'; | |
885 | match = regexecute(r, p, s, newline, count, try_backref); | |
886 | *s = save; | |
887 | ||
888 | if (match != NULL) | |
889 | return((char *) match); | |
890 | else | |
891 | { | |
892 | start = s; | |
893 | len = end - start; | |
894 | } | |
895 | } | |
896 | return(NULL); | |
897 | } | |
898 | } | |
899 | ||
900 | #include <ctype.h> | |
901 | int delta0[256]; | |
902 | unsigned char cmap[256]; /* (un)folded characters */ | |
903 | unsigned char pattern[5000]; | |
904 | int patlen; | |
905 | ||
906 | char * | |
907 | bmg_search(buffer, buflen) | |
908 | unsigned char *buffer; | |
909 | int buflen; | |
910 | { | |
911 | register unsigned char *k, *strend, *s, *buflim; | |
912 | register int t; | |
913 | int j; | |
914 | ||
915 | if (patlen > buflen) | |
916 | return NULL; | |
917 | ||
918 | buflim = buffer + buflen; | |
919 | if (buflen > patlen * 4) | |
920 | strend = buflim - patlen * 4; | |
921 | else | |
922 | strend = buffer; | |
923 | ||
924 | s = buffer; | |
925 | k = buffer + patlen - 1; | |
926 | ||
927 | for (;;) | |
928 | { | |
929 | /* The dreaded inner loop, revisited. */ | |
930 | while (k < strend && (t = delta0[*k])) | |
931 | { | |
932 | k += t; | |
933 | k += delta0[*k]; | |
934 | k += delta0[*k]; | |
935 | } | |
936 | while (k < buflim && delta0[*k]) | |
937 | ++k; | |
938 | if (k == buflim) | |
939 | break; | |
940 | ||
941 | j = patlen - 1; | |
942 | s = k; | |
943 | while (--j >= 0 && cmap[*--s] == pattern[j]) | |
944 | ; | |
945 | /* | |
946 | delta-less shortcut for literati, but | |
947 | short shrift for genetic engineers. | |
948 | */ | |
949 | if (j >= 0) | |
950 | k++; | |
951 | else /* submatch */ | |
952 | return ((char *)k); | |
953 | } | |
954 | return(NULL); | |
955 | } | |
956 | ||
957 | bmg_setup(pat, folded) /* compute "boyer-moore" delta table */ | |
958 | char *pat; | |
959 | int folded; | |
960 | { /* ... HAKMEM lives ... */ | |
961 | int j; | |
962 | ||
963 | patlen = strlen(pat); | |
964 | ||
965 | if (folded) /* fold case while saving pattern */ | |
966 | for (j = 0; j < patlen; j++) | |
967 | pattern[j] = (isupper((int) pat[j]) ? | |
968 | (char) tolower((int) pat[j]) : pat[j]); | |
969 | else | |
970 | memcpy(pattern, pat, patlen); | |
971 | ||
972 | for (j = 0; j < 256; j++) | |
973 | { | |
974 | delta0[j] = patlen; | |
975 | cmap[j] = (char) j; /* could be done at compile time */ | |
976 | } | |
977 | for (j = 0; j < patlen - 1; j++) | |
978 | delta0[pattern[j]] = patlen - j - 1; | |
979 | delta0[pattern[patlen - 1]] = 0; | |
980 | ||
981 | if (folded) | |
982 | { | |
983 | for (j = 0; j < patlen - 1; j++) | |
984 | if (islower((int) pattern[j])) | |
985 | delta0[toupper((int) pattern[j])] = patlen - j - 1; | |
986 | if (islower((int) pattern[patlen - 1])) | |
987 | delta0[toupper((int) pattern[patlen - 1])] = 0; | |
988 | for (j = 'A'; j <= 'Z'; j++) | |
989 | cmap[j] = (char) tolower((int) j); | |
990 | } | |
991 | } | |
992 | ||
993 | #ifdef nope | |
994 | #ifndef USG | |
995 | ||
996 | /* (groan) compatibility */ | |
997 | ||
998 | char * | |
999 | strchr(s, c) | |
1000 | char *s; | |
1001 | { | |
1002 | return index(s, c); | |
1003 | } | |
1004 | ||
1005 | char * | |
1006 | strrchr(s, c) | |
1007 | char *s; | |
1008 | { | |
1009 | return rindex(s, c); | |
1010 | } | |
1011 | ||
1012 | char * | |
1013 | memcpy(d, s, n) | |
1014 | char *d, *s; | |
1015 | { | |
1016 | return bcopy(s, d, n); | |
1017 | } | |
1018 | ||
1019 | #else | |
1020 | ||
1021 | char * | |
1022 | index(s, c) | |
1023 | char *s; | |
1024 | { | |
1025 | return strchr(s, c); | |
1026 | } | |
1027 | ||
1028 | char * | |
1029 | bcopy(s, d, n) | |
1030 | char *s, *d; | |
1031 | { | |
1032 | return memcpy(d, s, n); | |
1033 | } | |
1034 | ||
1035 | char * | |
1036 | bzero(s, n) | |
1037 | char *s; | |
1038 | { | |
1039 | return memset(s, 0, n); | |
1040 | } | |
1041 | ||
1042 | bcmp(s, t, n) | |
1043 | char *s, *t; | |
1044 | { | |
1045 | return memcmp(s, t, n); | |
1046 | } | |
1047 | ||
1048 | #endif | |
1049 | #endif |