386BSD 0.1 development
[unix-history] / usr / src / usr.bin / grep / grep.c
CommitLineData
09d678d6
WJ
1/* grep - print lines matching an extended regular expression
2 Copyright (C) 1988 Free Software Foundation, Inc.
3 Written June, 1988 by Mike Haertel
4 BMG speedups added July, 1988
5 by James A. Woods and Arthur David Olson
6
7 NO WARRANTY
8
9 BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
10NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
11WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
12RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
13WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
14BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
15FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
16AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
17DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
18CORRECTION.
19
20 IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
21STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
22WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
23LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
24OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
25USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
26DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
27A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
28PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
29DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
30
31 GENERAL PUBLIC LICENSE TO COPY
32
33 1. You may copy and distribute verbatim copies of this source file
34as you receive it, in any medium, provided that you conspicuously and
35appropriately publish on each copy a valid copyright notice "Copyright
36 (C) 1988 Free Software Foundation, Inc."; and include following the
37copyright notice a verbatim copy of the above disclaimer of warranty
38and of this License. You may charge a distribution fee for the
39physical act of transferring a copy.
40
41 2. You may modify your copy or copies of this source file or
42any portion of it, and copy and distribute such modifications under
43the terms of Paragraph 1 above, provided that you also do the following:
44
45 a) cause the modified files to carry prominent notices stating
46 that you changed the files and the date of any change; and
47
48 b) cause the whole of any work that you distribute or publish,
49 that in whole or in part contains or is a derivative of this
50 program or any part thereof, to be licensed at no charge to all
51 third parties on terms identical to those contained in this
52 License Agreement (except that you may choose to grant more extensive
53 warranty protection to some or all third parties, at your option).
54
55 c) You may charge a distribution fee for the physical act of
56 transferring a copy, and you may at your option offer warranty
57 protection in exchange for a fee.
58
59Mere aggregation of another unrelated program with this program (or its
60derivative) on a volume of a storage or distribution medium does not bring
61the other program under the scope of these terms.
62
63 3. You may copy and distribute this program or any portion of it in
64compiled, executable or object code form under the terms of Paragraphs
651 and 2 above provided that you do the following:
66
67 a) accompany it with the complete corresponding machine-readable
68 source code, which must be distributed under the terms of
69 Paragraphs 1 and 2 above; or,
70
71 b) accompany it with a written offer, valid for at least three
72 years, to give any third party free (except for a nominal
73 shipping charge) a complete machine-readable copy of the
74 corresponding source code, to be distributed under the terms of
75 Paragraphs 1 and 2 above; or,
76
77 c) accompany it with the information you received as to where the
78 corresponding source code may be obtained. (This alternative is
79 allowed only for noncommercial distribution and only if you
80 received the program in object code or executable form alone.)
81
82For an executable file, complete source code means all the source code for
83all modules it contains; but, as a special exception, it need not include
84source code for modules which are standard libraries that accompany the
85operating system on which the executable file runs.
86
87 4. You may not copy, sublicense, distribute or transfer this program
88except as expressly provided under this License Agreement. Any attempt
89otherwise to copy, sublicense, distribute or transfer this program is void and
90your rights to use the program under this License agreement shall be
91automatically terminated. However, parties who have received computer
92software programs from you with this License Agreement will not have
93their licenses terminated so long as such parties remain in full compliance.
94
95 5. If you wish to incorporate parts of this program into other free
96programs whose distribution conditions are different, write to the Free
97Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
98worked out a simple rule that can be stated here, but we will often permit
99this. We will be guided by the two goals of preserving the free status of
100all derivatives our free software and of promoting the sharing and reuse of
101software.
102
103
104In other words, you are welcome to use, share and improve this program.
105You are forbidden to forbid anyone else to use, share and improve
106what you give them. Help stamp out software-hoarding! */
107\f
108#include <ctype.h>
109#include <stdio.h>
110#ifdef USG
111#include <memory.h>
112#include <string.h>
113#else
114#include <strings.h>
115#endif
116#include "dfa.h"
117#include "regex.h"
118
119#ifdef __STDC__
120/*extern getopt(int, char **, const char *);
121extern read(int, void *, int);
122extern open(const char *, int, ...);
123extern void close();*/
124#else
125extern char *strrchr();
126#endif
127
128extern char *optarg;
129extern optind, opterr;
130extern errno;
131extern char *sys_errlist[];
132
133#define MAX(a, b) ((a) > (b) ? (a) : (b))
134
135/* Exit status codes. */
136#define MATCHES_FOUND 0 /* Exit 0 if no errors and matches found. */
137#define NO_MATCHES_FOUND 1 /* Exit 1 if no matches were found. */
138#define ERROR 2 /* Exit 2 if some error occurred. */
139
140/* Error is set true if something awful happened. */
141static int error;
142
143/* The program name for error messages. */
144static char *prog;
145
146/* We do all our own buffering by hand for efficiency. */
147static char *buffer; /* The buffer itself, grown as needed. */
148static bufbytes; /* Number of bytes in the buffer. */
149static size_t bufalloc; /* Number of bytes allocated to the buffer. */
150static bufprev; /* Number of bytes that have been forgotten.
151 This is used to get byte offsets from the
152 beginning of the file. */
153static bufread; /* Number of bytes to get with each read(). */
154
155static void
156initialize_buffer()
157{
158 bufread = 8192;
159 bufalloc = bufread + bufread / 2;
160 buffer = malloc(bufalloc);
161 if (! buffer)
162 {
163 fprintf(stderr, "%s: Memory exhausted (%s)\n", prog,
164 sys_errlist[errno]);
165 exit(ERROR);
166 }
167}
168
169/* The current input file. */
170static fd;
171static char *filename;
172static eof;
173
174/* Fill the buffer retaining the last n bytes at the beginning of the
175 newly filled buffer (for backward context). Returns the number of new
176 bytes read from disk. */
177static
178fill_buffer_retaining(n)
179 int n;
180{
181 char *p, *q;
182 int i;
183
184 /* See if we need to grow the buffer. */
185 if (bufalloc - n <= bufread)
186 {
187 while (bufalloc - n <= bufread)
188 {
189 bufalloc *= 2;
190 bufread *= 2;
191 }
192 buffer = realloc(buffer, bufalloc);
193 if (! buffer)
194 {
195 fprintf(stderr, "%s: Memory exhausted (%s)\n", prog,
196 sys_errlist[errno]);
197 exit(ERROR);
198 }
199 }
200
201 bufprev += bufbytes - n;
202
203 /* Shift stuff down. */
204 for (i = n, p = buffer, q = p + bufbytes - n; i--; )
205 *p++ = *q++;
206 bufbytes = n;
207
208 if (eof)
209 return 0;
210
211 /* Read in new stuff. */
212 i = read(fd, buffer + bufbytes, bufread);
213 if (i < 0)
214 {
215 fprintf(stderr, "%s: read on %s failed (%s)\n", prog,
216 filename ? filename : "<stdin>", sys_errlist[errno]);
217 error = 1;
218 }
219
220 /* Kludge to pretend every nonempty file ends with a newline. */
221 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
222 {
223 eof = i = 1;
224 buffer[bufbytes] = '\n';
225 }
226
227 bufbytes += i;
228 return i;
229}
230\f
231/* Various flags set according to the argument switches. */
232static trailing_context; /* Lines of context to show after matches. */
233static leading_context; /* Lines of context to show before matches. */
234static byte_count; /* Precede output lines the byte count of the
235 first character on the line. */
236static no_filenames; /* Do not display filenames. */
237static line_numbers; /* Precede output lines with line numbers. */
238static silent; /* Produce no output at all. This switch
239 is bogus, ever hear of /dev/null? */
240static nonmatching_lines; /* Print lines that don't match the regexp. */
241
242static bmgexec; /* Invoke Boyer-Moore-Gosper routines */
243
244/* The compiled regular expression lives here. */
245static struct regexp reg;
246
247/* The compiled regular expression for the backtracking matcher lives here. */
248static struct re_pattern_buffer regex;
249
250/* Pointer in the buffer after the last character printed. */
251static char *printed_limit;
252
253/* True when printed_limit has been artifically advanced without printing
254 anything. */
255static int printed_limit_fake;
256
257/* Print a line at the given line number, returning the number of
258 characters actually printed. Matching is true if the line is to
259 be considered a "matching line". This is only meaningful if
260 surrounding context is turned on. */
261static
262print_line(p, number, matching)
263 char *p;
264 int number;
265 int matching;
266{
267 int count = 0;
268
269 if (silent)
270 {
271 do
272 ++count;
273 while (*p++ != '\n');
274 printed_limit_fake = 0;
275 printed_limit = p;
276 return count;
277 }
278
279 if (filename && !no_filenames)
280 printf("%s%c", filename, matching ? ':' : '-');
281 if (byte_count)
282 printf("%d%c", p - buffer + bufprev, matching ? ':' : '-');
283 if (line_numbers)
284 printf("%d%c", number, matching ? ':' : '-');
285 do
286 {
287 ++count;
288 putchar(*p);
289 }
290 while (*p++ != '\n');
291 printed_limit_fake = 0;
292 printed_limit = p;
293 return count;
294}
295
296/* Print matching or nonmatching lines from the current file. Returns a
297 count of matching or nonmatching lines. */
298static
299grep()
300{
301 int retain = 0; /* Number of bytes to retain on next call
302 to fill_buffer_retaining(). */
303 char *search_limit; /* Pointer to the character after the last
304 newline in the buffer. */
305 char saved_char; /* Character after the last newline. */
306 char *resume; /* Pointer to where to resume search. */
307 int resume_index = 0; /* Count of characters to ignore after
308 refilling the buffer. */
309 int line_count = 1; /* Line number. */
310 int try_backref; /* Set to true if we need to verify the
311 match with a backtracking matcher. */
312 int initial_line_count; /* Line count at beginning of last search. */
313 char *match; /* Pointer to the first character after the
314 string matching the regexp. */
315 int match_count = 0; /* Count of matching lines. */
316 char *matching_line; /* Pointer to first character of the matching
317 line, or of the first line of context to
318 print if context is turned on. */
319 char *real_matching_line; /* Pointer to the first character of the
320 real matching line. */
321 char *next_line; /* Pointer to first character of the line
322 following the matching line. */
323 int pending_lines = 0; /* Lines of context left over from last match
324 that we have to print. */
325 static first_match = 1; /* True when nothing has been printed. */
326 int i;
327 char *tmp;
328 char *execute();
329
330 printed_limit_fake = 0;
331
332 while (fill_buffer_retaining(retain) > 0)
333 {
334 /* Find the last newline in the buffer. */
335 search_limit = buffer + bufbytes;
336 while (search_limit > buffer && search_limit[-1] != '\n')
337 --search_limit;
338 if (search_limit == buffer)
339 {
340 retain = bufbytes;
341 continue;
342 }
343
344 /* Save the character after the last newline so regexecute can write
345 its own sentinel newline. */
346 saved_char = *search_limit;
347
348 /* Search the buffer for a match. */
349 printed_limit = buffer;
350 resume = buffer + resume_index;
351 initial_line_count = line_count;
352
353 while (match = execute(&reg, resume, search_limit, 0, &line_count, &try_backref))
354 {
355 ++match_count;
356
357 /* Find the beginning of the matching line. */
358 matching_line = match;
359 while (matching_line > resume && matching_line[-1] != '\n')
360 --matching_line;
361 real_matching_line = matching_line;
362
363 /* Find the beginning of the next line. */
364 next_line = match;
365 while (next_line < search_limit && *next_line++ != '\n')
366 ;
367
368 /* If a potential backreference is indicated, try it out with
369 a backtracking matcher to make sure the line is a match. */
370 if (try_backref && re_search(&regex, matching_line,
371 next_line - matching_line - 1,
372 0,
373 next_line - matching_line - 1,
374 NULL) < 0)
375 {
376 resume = next_line;
377 if (resume == search_limit)
378 break;
379 else
380 continue;
381 }
382
383 /* Print leftover lines from last time. If nonmatching_lines is
384 turned on, print these as if they were matching lines. */
385 while (resume < matching_line && pending_lines)
386 {
387 resume += print_line(resume, initial_line_count++,
388 nonmatching_lines);
389 --pending_lines;
390 }
391
392 /* Print out the matching or nonmatching lines as necessary. */
393 if (! nonmatching_lines)
394 {
395 /* Back up over leading context if necessary. */
396 for (i = leading_context; matching_line > printed_limit
397 && i; --i)
398 {
399 while (matching_line > printed_limit
400 && (--matching_line)[-1] != '\n')
401 ;
402 --line_count;
403 }
404
405 /* If context is enabled, we may have to print a separator. */
406 if ((leading_context || trailing_context) && !silent
407 && !first_match && (printed_limit_fake || matching_line
408 > printed_limit))
409 printf("----------\n");
410 first_match = 0;
411
412 /* Print the matching line and its leading context. */
413 while (matching_line < real_matching_line)
414 matching_line += print_line(matching_line, line_count++, 0);
415 matching_line += print_line(matching_line, line_count++, 1);
416
417 /* If there's trailing context, leave some lines pending until
418 next time. */
419 pending_lines = trailing_context;
420 }
421 else if (matching_line > resume)
422 {
423 char *real_resume = resume;
424
425 /* Back up over leading context if necessary. */
426 for (i = leading_context; resume > printed_limit && i; --i)
427 {
428 while (resume > printed_limit && (--resume)[-1] != '\n')
429 ;
430 --initial_line_count;
431 }
432
433 /* If context is enabled, we may have to print a separator. */
434 if ((leading_context || trailing_context) && !silent
435 && !first_match && (printed_limit_fake || resume
436 > printed_limit))
437 printf("----------\n");
438 first_match = 0;
439
440 /* Print out the presumably matching leading context. */
441 while (resume < real_resume)
442 resume += print_line(resume, initial_line_count++, 0);
443
444 /* Print out the nonmatching lines prior to the matching line. */
445 while (resume < matching_line)
446 resume += print_line(resume, initial_line_count++, 1);
447
448 /* Deal with trailing context. */
449 if (trailing_context)
450 {
451 print_line(matching_line, line_count, 0);
452 pending_lines = trailing_context - 1;
453 }
454
455 /* Count the current line. */
456 ++line_count;
457 }
458 else
459 {
460 /* The line immediately after a matching line has to be printed
461 because it was pending. */
462 if (pending_lines > 0)
463 {
464 --pending_lines;
465 print_line(matching_line, line_count, 0);
466 }
467 ++line_count;
468 }
469
470 /* Resume searching at the beginning of the next line. */
471 initial_line_count = line_count;
472 resume = next_line;
473
474 if (resume == search_limit)
475 break;
476 }
477
478 /* Restore the saved character. */
479 *search_limit = saved_char;
480
481 if (! nonmatching_lines)
482 {
483 while (resume < search_limit && pending_lines)
484 {
485 resume += print_line(resume, initial_line_count++, 0);
486 --pending_lines;
487 }
488 }
489 else if (search_limit > resume)
490 {
491 char *initial_resume = resume;
492
493 /* Back up over leading context if necessary. */
494 for (i = leading_context; resume > printed_limit && i; --i)
495 {
496 while (resume > printed_limit && (--resume)[-1] != '\n')
497 ;
498 --initial_line_count;
499 }
500
501 /* If context is enabled, we may have to print a separator. */
502 if ((leading_context || trailing_context) && !silent
503 && !first_match && (printed_limit_fake || resume
504 > printed_limit))
505 printf("----------\n");
506 first_match = 0;
507
508 /* Print out all the nonmatching lines up to the search limit. */
509 while (resume < initial_resume)
510 resume += print_line(resume, initial_line_count++, 0);
511 while (resume < search_limit)
512 resume += print_line(resume, initial_line_count++, 1);
513
514 pending_lines = trailing_context;
515 resume_index = 0;
516 retain = bufbytes - (search_limit - buffer);
517 continue;
518 }
519
520 /* Save the trailing end of the buffer for possible use as leading
521 context in the future. */
522 i = leading_context;
523 tmp = search_limit;
524 while (tmp > printed_limit && i--)
525 while (tmp > printed_limit && (--tmp)[-1] != '\n')
526 ;
527 resume_index = search_limit - tmp;
528 retain = bufbytes - (tmp - buffer);
529 if (tmp > printed_limit)
530 printed_limit_fake = 1;
531 }
532
533 return nonmatching_lines ? (line_count - 1) - match_count : match_count;
534}
535\f
536void
537usage_and_die()
538{
539 fprintf(stderr,
540"usage: %s [-CVbchilnsvwx] [-<num>] [-AB <num>] [-f file] [-e] expr [files]\n",
541 prog);
542 exit(ERROR);
543}
544
545static char version[] = "GNU e?grep, version 1.5";
546
547main(argc, argv)
548 int argc;
549 char **argv;
550{
551 int c;
552 int ignore_case = 0; /* Compile the regexp to ignore case. */
553 char *the_regexp = 0; /* The regular expression. */
554 int regexp_len; /* Length of the regular expression. */
555 char *regexp_file = 0; /* File containing parallel regexps. */
556 int count_lines = 0; /* Display only a count of matching lines. */
557 int list_files = 0; /* Display only the names of matching files. */
558 int whole_word = 0; /* Insist that the regexp match a word only. */
559 int whole_line = 0; /* Insist on matching only whole lines. */
560 int line_count = 0; /* Count of matching lines for a file. */
561 int matches_found = 0; /* True if matches were found. */
562 char *regex_errmesg; /* Error message from regex routines. */
563 char translate[_NOTCHAR]; /* Translate table for case conversion
564 (needed by the backtracking matcher). */
565
566 if (prog = strrchr(argv[0], '/'))
567 ++prog;
568 else
569 prog = argv[0];
570
571 opterr = 0;
572 while ((c = getopt(argc, argv, "0123456789A:B:CVbce:f:hilnsvwx")) != EOF)
573 switch (c)
574 {
575 case '?':
576 usage_and_die();
577 break;
578
579 case '0':
580 case '1':
581 case '2':
582 case '3':
583 case '4':
584 case '5':
585 case '6':
586 case '7':
587 case '8':
588 case '9':
589 trailing_context = 10 * trailing_context + c - '0';
590 leading_context = 10 * leading_context + c - '0';
591 break;
592
593 case 'A':
594 if (! sscanf(optarg, "%d", &trailing_context)
595 || trailing_context < 0)
596 usage_and_die();
597 break;
598
599 case 'B':
600 if (! sscanf(optarg, "%d", &leading_context)
601 || leading_context < 0)
602 usage_and_die();
603 break;
604
605 case 'C':
606 trailing_context = leading_context = 2;
607 break;
608
609 case 'V':
610 fprintf(stderr, "%s\n", version);
611 break;
612
613 case 'b':
614 byte_count = 1;
615 break;
616
617 case 'c':
618 count_lines = 1;
619 silent = 1;
620 break;
621
622 case 'e':
623 /* It doesn't make sense to mix -f and -e. */
624 if (regexp_file)
625 usage_and_die();
626 the_regexp = optarg;
627 break;
628
629 case 'f':
630 /* It doesn't make sense to mix -f and -e. */
631 if (the_regexp)
632 usage_and_die();
633 regexp_file = optarg;
634 break;
635
636 case 'h':
637 no_filenames = 1;
638 break;
639
640 case 'i':
641 ignore_case = 1;
642 for (c = 0; c < _NOTCHAR; ++c)
643 if (isupper(c))
644 translate[c] = tolower(c);
645 else
646 translate[c] = c;
647 regex.translate = translate;
648 break;
649
650 case 'l':
651 list_files = 1;
652 silent = 1;
653 break;
654
655 case 'n':
656 line_numbers = 1;
657 break;
658
659 case 's':
660 silent = 1;
661 break;
662
663 case 'v':
664 nonmatching_lines = 1;
665 break;
666
667 case 'w':
668 whole_word = 1;
669 break;
670
671 case 'x':
672 whole_line = 1;
673 break;
674
675 default:
676 /* This can't happen. */
677 fprintf(stderr, "%s: getopt(3) let one by!\n", prog);
678 usage_and_die();
679 break;
680 }
681
682 /* Set the syntax depending on whether we are EGREP or not. */
683#ifdef EGREP
684 regsyntax(RE_SYNTAX_EGREP, ignore_case);
685 re_set_syntax(RE_SYNTAX_EGREP);
686#else
687 regsyntax(RE_SYNTAX_GREP, ignore_case);
688 re_set_syntax(RE_SYNTAX_GREP);
689#endif
690
691 /* Compile the regexp according to all the options. */
692 if (regexp_file)
693 {
694 FILE *fp = fopen(regexp_file, "r");
695 int len = 256;
696 int i = 0;
697
698 if (! fp)
699 {
700 fprintf(stderr, "%s: %s: %s\n", prog, regexp_file,
701 sys_errlist[errno]);
702 exit(ERROR);
703 }
704
705 the_regexp = malloc(len);
706 while ((c = getc(fp)) != EOF)
707 {
708 the_regexp[i++] = c;
709 if (i == len)
710 the_regexp = realloc(the_regexp, len *= 2);
711 }
712 fclose(fp);
713 /* Nuke the concluding newline so we won't match the empty string. */
714 if (i > 0 && the_regexp[i - 1] == '\n')
715 --i;
716 regexp_len = i;
717 }
718 else if (! the_regexp)
719 {
720 if (optind >= argc)
721 usage_and_die();
722 the_regexp = argv[optind++];
723 regexp_len = strlen(the_regexp);
724 }
725 else
726 regexp_len = strlen(the_regexp);
727
728 if (whole_word || whole_line)
729 {
730 char *n = malloc(regexp_len + 8);
731 int i = 0;
732
733 if (whole_line)
734 n[i++] = '^';
735 else
736 n[i++] = '\\', n[i++] = '<';
737#ifndef EGREP
738 n[i++] = '\\';
739#endif
740 n[i++] = '(';
741 memcpy(n + i, the_regexp, regexp_len);
742 i += regexp_len;
743#ifndef EGREP
744 n[i++] = '\\';
745#endif
746 n[i++] = ')';
747 if (whole_line)
748 n[i++] = '$';
749 else
750 n[i++] = '\\', n[i++] = '>';
751 the_regexp = n;
752 regexp_len = i;
753 }
754
755 regcompile(the_regexp, regexp_len, &reg, 1);
756
757 if (regex_errmesg = re_compile_pattern(the_regexp, regexp_len, &regex))
758 regerror(regex_errmesg);
759
760 /*
761 Find the longest metacharacter-free string which must occur in the
762 regexpr, before short-circuiting regexecute() with Boyer-Moore-Gosper.
763 (Conjecture: The problem in general is NP-complete.) If there is no
764 such string (like for many alternations), then default to full automaton
765 search. regmust() code and heuristics [see dfa.c] courtesy
766 Arthur David Olson.
767 */
768 if (line_numbers == 0 && nonmatching_lines == 0)
769 {
770 if (reg.mustn == 0 || reg.mustn == MUST_MAX ||
771 strchr(reg.must, '\0') != reg.must + reg.mustn)
772 bmgexec = 0;
773 else
774 {
775 reg.must[reg.mustn] = '\0';
776 if (getenv("MUSTDEBUG") != NULL)
777 (void) printf("must have: \"%s\"\n", reg.must);
778 bmg_setup(reg.must, ignore_case);
779 bmgexec = 1;
780 }
781 }
782
783 if (argc - optind < 2)
784 no_filenames = 1;
785
786 initialize_buffer();
787
788 if (argc > optind)
789 while (optind < argc)
790 {
791 bufprev = eof = 0;
792 filename = argv[optind++];
793 fd = open(filename, 0, 0);
794 if (fd < 0)
795 {
796 fprintf(stderr, "%s: %s: %s\n", prog, filename,
797 sys_errlist[errno]);
798 error = 1;
799 continue;
800 }
801 if (line_count = grep())
802 matches_found = 1;
803 close(fd);
804 if (count_lines)
805 if (!no_filenames)
806 printf("%s:%d\n", filename, line_count);
807 else
808 printf("%d\n", line_count);
809 else if (list_files && line_count)
810 printf("%s\n", filename);
811 }
812 else
813 {
814 if (line_count = grep())
815 matches_found = 1;
816 if (count_lines)
817 printf("%d\n", line_count);
818 else if (list_files && line_count)
819 printf("<stdin>\n");
820 }
821
822 if (error)
823 exit(ERROR);
824 if (matches_found)
825 exit(MATCHES_FOUND);
826 exit(NO_MATCHES_FOUND);
827}
828
829/* Needed by the regexp routines. This could be fancier, especially when
830 dealing with parallel regexps in files. */
831void
832regerror(s)
833 const char *s;
834{
835 fprintf(stderr, "%s: %s\n", prog, s);
836 exit(ERROR);
837}
838
839/*
840 bmg_setup() and bmg_search() adapted from:
841 Boyer/Moore/Gosper-assisted 'egrep' search, with delta0 table as in
842 original paper (CACM, October, 1977). No delta1 or delta2. According to
843 experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of minimal
844 practical value. However, to improve for worst case input, integrating
845 the improved Galil strategies (Apostolico/Giancarlo, Siam. J. Comput.,
846 February 1986) deserves consideration.
847
848 James A. Woods Copyleft (C) 1986, 1988
849 NASA Ames Research Center
850*/
851
852char *
853execute(r, begin, end, newline, count, try_backref)
854 struct regexp *r;
855 char *begin;
856 char *end;
857 int newline;
858 int *count;
859 int *try_backref;
860{
861 register char *p, *s;
862 char *match;
863 char *start = begin;
864 char save; /* regexecute() sentinel */
865 int len;
866 char *bmg_search();
867
868 if (!bmgexec) /* full automaton search */
869 return(regexecute(r, begin, end, newline, count, try_backref));
870 else
871 {
872 len = end - begin;
873 while ((match = bmg_search((unsigned char *) start, len)) != NULL)
874 {
875 p = match; /* narrow search range to submatch line */
876 while (p > begin && *p != '\n')
877 p--;
878 s = match;
879 while (s < end && *s != '\n')
880 s++;
881 s++;
882
883 save = *s;
884 *s = '\0';
885 match = regexecute(r, p, s, newline, count, try_backref);
886 *s = save;
887
888 if (match != NULL)
889 return((char *) match);
890 else
891 {
892 start = s;
893 len = end - start;
894 }
895 }
896 return(NULL);
897 }
898}
899
900#include <ctype.h>
901int delta0[256];
902unsigned char cmap[256]; /* (un)folded characters */
903unsigned char pattern[5000];
904int patlen;
905
906char *
907bmg_search(buffer, buflen)
908 unsigned char *buffer;
909 int buflen;
910{
911 register unsigned char *k, *strend, *s, *buflim;
912 register int t;
913 int j;
914
915 if (patlen > buflen)
916 return NULL;
917
918 buflim = buffer + buflen;
919 if (buflen > patlen * 4)
920 strend = buflim - patlen * 4;
921 else
922 strend = buffer;
923
924 s = buffer;
925 k = buffer + patlen - 1;
926
927 for (;;)
928 {
929 /* The dreaded inner loop, revisited. */
930 while (k < strend && (t = delta0[*k]))
931 {
932 k += t;
933 k += delta0[*k];
934 k += delta0[*k];
935 }
936 while (k < buflim && delta0[*k])
937 ++k;
938 if (k == buflim)
939 break;
940
941 j = patlen - 1;
942 s = k;
943 while (--j >= 0 && cmap[*--s] == pattern[j])
944 ;
945 /*
946 delta-less shortcut for literati, but
947 short shrift for genetic engineers.
948 */
949 if (j >= 0)
950 k++;
951 else /* submatch */
952 return ((char *)k);
953 }
954 return(NULL);
955}
956
957bmg_setup(pat, folded) /* compute "boyer-moore" delta table */
958 char *pat;
959 int folded;
960{ /* ... HAKMEM lives ... */
961 int j;
962
963 patlen = strlen(pat);
964
965 if (folded) /* fold case while saving pattern */
966 for (j = 0; j < patlen; j++)
967 pattern[j] = (isupper((int) pat[j]) ?
968 (char) tolower((int) pat[j]) : pat[j]);
969 else
970 memcpy(pattern, pat, patlen);
971
972 for (j = 0; j < 256; j++)
973 {
974 delta0[j] = patlen;
975 cmap[j] = (char) j; /* could be done at compile time */
976 }
977 for (j = 0; j < patlen - 1; j++)
978 delta0[pattern[j]] = patlen - j - 1;
979 delta0[pattern[patlen - 1]] = 0;
980
981 if (folded)
982 {
983 for (j = 0; j < patlen - 1; j++)
984 if (islower((int) pattern[j]))
985 delta0[toupper((int) pattern[j])] = patlen - j - 1;
986 if (islower((int) pattern[patlen - 1]))
987 delta0[toupper((int) pattern[patlen - 1])] = 0;
988 for (j = 'A'; j <= 'Z'; j++)
989 cmap[j] = (char) tolower((int) j);
990 }
991}
992
993#ifdef nope
994#ifndef USG
995
996/* (groan) compatibility */
997
998char *
999strchr(s, c)
1000 char *s;
1001{
1002 return index(s, c);
1003}
1004
1005char *
1006strrchr(s, c)
1007 char *s;
1008{
1009 return rindex(s, c);
1010}
1011
1012char *
1013memcpy(d, s, n)
1014 char *d, *s;
1015{
1016 return bcopy(s, d, n);
1017}
1018
1019#else
1020
1021char *
1022index(s, c)
1023 char *s;
1024{
1025 return strchr(s, c);
1026}
1027
1028char *
1029bcopy(s, d, n)
1030 char *s, *d;
1031{
1032 return memcpy(d, s, n);
1033}
1034
1035char *
1036bzero(s, n)
1037 char *s;
1038{
1039 return memset(s, 0, n);
1040}
1041
1042bcmp(s, t, n)
1043 char *s, *t;
1044{
1045 return memcmp(s, t, n);
1046}
1047
1048#endif
1049#endif