Commit | Line | Data |
---|---|---|
fcbc81c8 WJ |
1 | /* fgrep.c - grep program built around matcher. |
2 | Copyright 1989 Free Software Foundation | |
3 | Written August 1989 by Mike Haertel. | |
4 | ||
5 | This program is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 1, or (at your option) | |
8 | any later version. | |
9 | ||
10 | This program is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License | |
16 | along with this program; if not, write to the Free Software | |
17 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
18 | ||
19 | The author may be reached (Email) at the address mike@ai.mit.edu, | |
20 | or (US mail) as Mike Haertel c/o Free Software Foundation. */ | |
21 | ||
22 | #include "std.h" | |
23 | #include "unix.h" | |
24 | ||
25 | #include <errno.h> | |
26 | #include <stdio.h> | |
27 | #include <string.h> | |
28 | ||
29 | #include "kwset.h" | |
30 | ||
31 | #define NCHAR (UCHAR_MAX + 1) | |
32 | ||
33 | /* For error messages. */ | |
34 | static const char *prog; | |
35 | static int error_seen; | |
36 | ||
37 | /* Flags controlling the style of output. */ | |
38 | static int out_silent; /* Suppress all normal output. */ | |
39 | static int out_invert; /* Print nonmatching stuff. */ | |
40 | static int out_file; /* Print filenames. */ | |
41 | static int out_line; /* Print line numbers. */ | |
42 | static int out_byte; /* Print byte offsets. */ | |
43 | static int out_before; /* Lines of leading context. */ | |
44 | static int out_after; /* Lines of trailing context. */ | |
45 | ||
46 | /* Print MESG and possibly the error string for ERRNUM. Remember | |
47 | that something awful happened. */ | |
48 | static void | |
49 | DEFUN(error, (mesg, errnum), const char *mesg AND int errnum) | |
50 | { | |
51 | if (errnum) | |
52 | fprintf(stderr, "%s: %s: %s\n", prog, mesg, strerror(errnum)); | |
53 | else | |
54 | fprintf(stderr, "%s: %s\n", prog, mesg); | |
55 | error_seen = 1; | |
56 | } | |
57 | ||
58 | /* Like error(), but die horribly after printing. */ | |
59 | static void | |
60 | DEFUN(fatal, (mesg, errnum), const char *mesg AND int errnum) | |
61 | { | |
62 | error(mesg, errnum); | |
63 | exit(2); | |
64 | } | |
65 | ||
66 | /* Interface to handle errors and fix library lossage. */ | |
67 | static PTR | |
68 | DEFUN(xmalloc, (size), size_t size) | |
69 | { | |
70 | PTR result; | |
71 | ||
72 | result = malloc(size); | |
73 | if (size && !result) | |
74 | fatal("memory exhausted", 0); | |
75 | return result; | |
76 | } | |
77 | ||
78 | /* Interface to handle errors and fix some library lossage. */ | |
79 | static PTR | |
80 | DEFUN(xrealloc, (ptr, size), PTR ptr AND size_t size) | |
81 | { | |
82 | PTR result; | |
83 | ||
84 | if (ptr) | |
85 | result = realloc(ptr, size); | |
86 | else | |
87 | result = malloc(size); | |
88 | if (size && !result) | |
89 | fatal("memory exhausted", 0); | |
90 | return result; | |
91 | } | |
92 | ||
93 | /* Compiled search pattern. */ | |
94 | kwset_t kwset; | |
95 | ||
96 | /* Flags controlling how pattern matching is performed. */ | |
97 | static int match_fold; /* Fold all letters to one case. */ | |
98 | static int match_words; /* Match only whole words. */ | |
99 | static int match_lines; /* Match only whole lines. */ | |
100 | ||
101 | static void | |
102 | DEFUN(compile, (pattern, size), const char *pattern AND size_t size) | |
103 | { | |
104 | const char *beg, *lim, *err; | |
105 | static char trans[NCHAR]; | |
106 | int i; | |
107 | ||
108 | if (match_fold) | |
109 | for (i = 0; i < NCHAR; ++i) | |
110 | trans[i] = TOLOWER(i); | |
111 | ||
112 | if (!(kwset = kwsalloc(match_fold ? trans : (const char *) NULL))) | |
113 | fatal("memory exhausted", 0); | |
114 | ||
115 | beg = pattern; | |
116 | do | |
117 | { | |
118 | for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim) | |
119 | ; | |
120 | if (err = kwsincr(kwset, beg, lim - beg)) | |
121 | fatal(err, 0); | |
122 | if (lim < pattern + size) | |
123 | ++lim; | |
124 | beg = lim; | |
125 | } | |
126 | while (beg < pattern + size); | |
127 | ||
128 | if (err = kwsprep(kwset)) | |
129 | fatal(err, 0); | |
130 | } | |
131 | ||
132 | static char * | |
133 | DEFUN(execute, (buf, size), char *buf AND size_t size) | |
134 | { | |
135 | register char *beg, *try; | |
136 | register size_t len; | |
137 | struct kwsmatch kwsmatch; | |
138 | ||
139 | beg = buf; | |
140 | for (;beg <= buf + size; ++beg) | |
141 | { | |
142 | if (!(beg = kwsexec(kwset, beg, buf + size - beg, &kwsmatch))) | |
143 | return NULL;; | |
144 | len = kwsmatch.size[0]; | |
145 | if (match_lines) | |
146 | { | |
147 | if (beg > buf && beg[-1] != '\n') | |
148 | continue; | |
149 | if (beg + len < buf + size && *(beg + len) != '\n') | |
150 | continue; | |
151 | return beg; | |
152 | } | |
153 | else if (match_words) | |
154 | for (try = beg; len && try;) | |
155 | { | |
156 | if (try > buf && (ISALNUM((unsigned char) try[-1]) | |
157 | || !ISALNUM((unsigned char) *try))) | |
158 | goto retry; | |
159 | if (try + len < buf + size | |
160 | && (ISALNUM((unsigned char) *(try + len)) | |
161 | || !ISALNUM((unsigned char) (try + len)[-1]))) | |
162 | goto retry; | |
163 | return try; | |
164 | retry: | |
165 | if (--len) | |
166 | try = kwsexec(kwset, beg, len, &kwsmatch); | |
167 | else | |
168 | break; | |
169 | len = kwsmatch.size[0]; | |
170 | } | |
171 | else | |
172 | return beg; | |
173 | } | |
174 | ||
175 | return NULL; | |
176 | } | |
177 | ||
178 | /* Hairy buffering mechanism to efficiently support all the options. */ | |
179 | static char *bufbeg; /* Beginning of user-visible portion. */ | |
180 | static char *buflim; /* Limit of user-visible portion. */ | |
181 | static char *buf; /* Pointer to base of buffer. */ | |
182 | static size_t bufalloc; /* Allocated size of buffer. */ | |
183 | static size_t bufcc; /* Count of characters in buffer. */ | |
184 | static unsigned long int buftotalcc; | |
185 | /* Total character count since reset. */ | |
186 | static char *buflast; /* Pointer after last character printed. */ | |
187 | static int bufgap; /* Weird flag indicating buflast is a lie. */ | |
188 | static unsigned long int buftotalnl; | |
189 | /* Count of newlines before last character. */ | |
190 | static int bufpending; /* Lines of pending output at buflast. */ | |
191 | static int bufdesc; /* File descriptor to read from. */ | |
192 | static int bufeof; /* Flag indicating EOF reached. */ | |
193 | static const char *buffile; /* File name for messages. */ | |
194 | ||
195 | /* Scan and count the newlines prior to LIM in the buffer. */ | |
196 | static void | |
197 | DEFUN(nlscan, (lim), register char *lim) | |
198 | { | |
199 | register char *p; | |
200 | ||
201 | for (p = buflast; p < lim; ++p) | |
202 | if (*p == '\n') | |
203 | ++buftotalnl; | |
204 | buflast = lim; | |
205 | } | |
206 | ||
207 | /* Print the line beginning at BEG, using SEP to separate optional label | |
208 | fields from the text of the line. Return the size of the line. */ | |
209 | static size_t | |
210 | DEFUN(prline, (beg, sep), register char *beg AND register char sep) | |
211 | { | |
212 | register size_t cc; | |
213 | register char c; | |
214 | static int err; | |
215 | ||
216 | cc = 0; | |
217 | ||
218 | if (out_silent || err) | |
219 | while (beg < buflim) | |
220 | { | |
221 | ++cc; | |
222 | if (*beg++ == '\n') | |
223 | break; | |
224 | } | |
225 | else | |
226 | { | |
227 | if (out_file) | |
228 | printf("%s%c", buffile, sep); | |
229 | if (out_line) | |
230 | { | |
231 | nlscan(beg); | |
232 | printf("%d%c", buftotalnl + 1, sep); | |
233 | } | |
234 | if (out_byte) | |
235 | printf("%lu%c", buftotalcc + (beg - buf), sep); | |
236 | while (beg < buflim) | |
237 | { | |
238 | ++cc; | |
239 | c = *beg++; | |
240 | putchar(c); | |
241 | if (c == '\n') | |
242 | break; | |
243 | } | |
244 | if (ferror(stdout)) | |
245 | { | |
246 | error("output error", errno); | |
247 | err = 1; | |
248 | } | |
249 | } | |
250 | ||
251 | if (out_line) | |
252 | nlscan(beg); | |
253 | else | |
254 | buflast = beg; | |
255 | bufgap = 0; | |
256 | ||
257 | return cc; | |
258 | } | |
259 | ||
260 | /* Print pending bytes of last trailing context prior to LIM. */ | |
261 | static void | |
262 | DEFUN(prpending, (lim), register char *lim) | |
263 | { | |
264 | while (buflast < lim && bufpending) | |
265 | { | |
266 | --bufpending; | |
267 | prline(buflast, '-'); | |
268 | } | |
269 | } | |
270 | ||
271 | /* Print the lines between BEG and LIM. Deal with context crap. | |
272 | Return the count of lines between BEG and LIM. */ | |
273 | static int | |
274 | DEFUN(prtext, (beg, lim), char *beg AND char *lim) | |
275 | { | |
276 | static int used; | |
277 | register char *p; | |
278 | int i, n; | |
279 | ||
280 | prpending(beg); | |
281 | ||
282 | p = beg; | |
283 | for (i = 0; i < out_before; ++i) | |
284 | if (p > buflast) | |
285 | do | |
286 | --p; | |
287 | while (p > buflast && p[-1] != '\n'); | |
288 | ||
289 | if ((out_before || out_after) && used && (p > buflast || bufgap)) | |
290 | puts("--"); | |
291 | ||
292 | while (p < beg) | |
293 | p += prline(p, '-'); | |
294 | ||
295 | n = 0; | |
296 | while (p < lim) | |
297 | { | |
298 | ++n; | |
299 | p += prline(p, ':'); | |
300 | } | |
301 | ||
302 | bufpending = out_after; | |
303 | used = 1; | |
304 | ||
305 | return n; | |
306 | } | |
307 | ||
308 | /* Fill the user-visible portion of the buffer, returning a byte count. */ | |
309 | static int | |
310 | fillbuf() | |
311 | { | |
312 | register char *b, *d, *l; | |
313 | int i, cc; | |
314 | size_t discard, save; | |
315 | ||
316 | prpending(buflim); | |
317 | ||
318 | b = buflim; | |
319 | for (i = 0; i < out_before; ++i) | |
320 | if (b > buflast) | |
321 | do | |
322 | --b; | |
323 | while (b > buflast && b[-1] != '\n'); | |
324 | ||
325 | if (buflast < b) | |
326 | bufgap = 1; | |
327 | if (out_line) | |
328 | nlscan(b); | |
329 | ||
330 | discard = b - buf; | |
331 | save = buflim - b; | |
332 | ||
333 | if (b > buf) | |
334 | { | |
335 | d = buf; | |
336 | l = buf + bufcc; | |
337 | while (b < l) | |
338 | *d++ = *b++; | |
339 | } | |
340 | ||
341 | bufcc -= discard; | |
342 | buftotalcc += discard; | |
343 | ||
344 | do | |
345 | { | |
346 | if (!bufeof) | |
347 | { | |
348 | if (bufcc > bufalloc / 2) | |
349 | buf = xrealloc(buf, bufalloc *= 2); | |
350 | cc = read(bufdesc, buf + bufcc, bufalloc - bufcc); | |
351 | if (cc < 0) | |
352 | { | |
353 | error(buffile, errno); | |
354 | bufeof = 1; | |
355 | } | |
356 | else | |
357 | { | |
358 | bufeof = !cc; | |
359 | bufcc += cc; | |
360 | } | |
361 | } | |
362 | bufbeg = buf + save; | |
363 | for (l = buf + bufcc; l > bufbeg && l[-1] != '\n'; --l) | |
364 | ; | |
365 | buflim = l; | |
366 | buflast = buf; | |
367 | } | |
368 | while (!bufeof && bufbeg == buflim); | |
369 | ||
370 | if (bufeof) | |
371 | buflim = buf + bufcc; | |
372 | ||
373 | return buflim - bufbeg; | |
374 | } | |
375 | ||
376 | /* One-time initialization. */ | |
377 | static void | |
378 | initbuf() | |
379 | { | |
380 | bufalloc = 8192; | |
381 | buf = xmalloc(bufalloc); | |
382 | } | |
383 | ||
384 | /* Reset the buffer for a new file. */ | |
385 | static void | |
386 | DEFUN(resetbuf, (desc, file), int desc AND const char *file) | |
387 | { | |
388 | bufbeg = buf; | |
389 | buflim = buf; | |
390 | bufcc = 0; | |
391 | buftotalcc = 0; | |
392 | buflast = buf; | |
393 | bufgap = 0; | |
394 | buftotalnl = 0; | |
395 | bufpending = 0; | |
396 | bufdesc = desc; | |
397 | bufeof = 0; | |
398 | buffile = file; | |
399 | } | |
400 | ||
401 | /* Scan the user-visible portion of the buffer, calling prtext() for | |
402 | matching lines (or between matching lines if OUT_INVERT is true). | |
403 | Return a count of lines printed. */ | |
404 | static int | |
405 | grepbuf() | |
406 | { | |
407 | int total; | |
408 | register char *p, *b, *l; | |
409 | ||
410 | total = 0; | |
411 | p = bufbeg; | |
412 | while (b = execute(p, buflim - p)) | |
413 | { | |
414 | if (b == buflim && (b > bufbeg && b[-1] == '\n' || b == bufbeg)) | |
415 | break; | |
416 | while (b > bufbeg && b[-1] != '\n') | |
417 | --b; | |
418 | l = b + 1; | |
419 | while (l < buflim && l[-1] != '\n') | |
420 | ++l; | |
421 | if (!out_invert) | |
422 | total += prtext(b, l); | |
423 | else if (p < b) | |
424 | total += prtext(p, b); | |
425 | p = l; | |
426 | } | |
427 | if (out_invert && p < buflim) | |
428 | total += prtext(p, buflim); | |
429 | return total; | |
430 | } | |
431 | ||
432 | /* Scan the given file, returning a count of lines printed. */ | |
433 | static int | |
434 | DEFUN(grep, (desc, file), int desc AND const char *file) | |
435 | { | |
436 | int total; | |
437 | ||
438 | total = 0; | |
439 | resetbuf(desc, file); | |
440 | while (fillbuf()) | |
441 | total += grepbuf(); | |
442 | return total; | |
443 | } | |
444 | ||
445 | static const char version[] = "GNU fgrep, version 1.1"; | |
446 | ||
447 | #define USAGE \ | |
448 | "usage: %s [-[[AB] ]<num>] [-[CVchilnsvwx]] [-[ef]] <expr> [<files...>]\n" | |
449 | ||
450 | static void | |
451 | usage() | |
452 | { | |
453 | fprintf(stderr, USAGE, prog); | |
454 | exit(2); | |
455 | } | |
456 | ||
457 | int | |
458 | DEFUN(main, (argc, argv), int argc AND char *argv[]) | |
459 | { | |
460 | char *keys; | |
461 | size_t keycc, keyalloc; | |
462 | int count_matches, no_filenames, list_files; | |
463 | int opt, cc, desc, count, status; | |
464 | FILE *fp; | |
465 | ||
466 | prog = argv[0]; | |
467 | if (prog && strrchr(prog, '/')) | |
468 | prog = strrchr(prog, '/') + 1; | |
469 | ||
470 | keys = NULL; | |
471 | count_matches = 0; | |
472 | no_filenames = 0; | |
473 | list_files = 0; | |
474 | ||
475 | while ((opt = getopt(argc, argv, "0123456789A:B:CVbce:f:hilnsvwxy")) != EOF) | |
476 | switch (opt) | |
477 | { | |
478 | case '0': | |
479 | case '1': | |
480 | case '2': | |
481 | case '3': | |
482 | case '4': | |
483 | case '5': | |
484 | case '6': | |
485 | case '7': | |
486 | case '8': | |
487 | case '9': | |
488 | out_before = 10 * out_before + opt - '0'; | |
489 | out_after = 10 * out_after + opt - '0'; | |
490 | break; | |
491 | case 'A': | |
492 | out_after = atoi(optarg); | |
493 | if (out_after < 0) | |
494 | usage(); | |
495 | break; | |
496 | case 'B': | |
497 | out_before = atoi(optarg); | |
498 | if (out_before < 0) | |
499 | usage(); | |
500 | break; | |
501 | case 'C': | |
502 | out_before = out_after = 2; | |
503 | break; | |
504 | case 'V': | |
505 | fprintf(stderr, "%s\n", version); | |
506 | break; | |
507 | case 'b': | |
508 | out_byte = 1; | |
509 | break; | |
510 | case 'c': | |
511 | out_silent = 1; | |
512 | count_matches = 1; | |
513 | break; | |
514 | case 'e': | |
515 | if (keys) | |
516 | usage(); | |
517 | keys = optarg; | |
518 | keycc = strlen(keys); | |
519 | break; | |
520 | case 'f': | |
521 | if (keys) | |
522 | usage(); | |
523 | fp = strcmp(optarg, "-") ? fopen(optarg, "r") : stdin; | |
524 | if (!fp) | |
525 | fatal(optarg, errno); | |
526 | keyalloc = 1024; | |
527 | keys = xmalloc(keyalloc); | |
528 | keycc = 0; | |
529 | while (!feof(fp) | |
530 | && (cc = fread(keys + keycc, 1, keyalloc - keycc, fp)) > 0) | |
531 | { | |
532 | keycc += cc; | |
533 | if (keycc == keyalloc) | |
534 | keys = xrealloc(keys, keyalloc *= 2); | |
535 | } | |
536 | if (fp != stdin) | |
537 | fclose(fp); | |
538 | break; | |
539 | case 'h': | |
540 | no_filenames = 1; | |
541 | break; | |
542 | case 'i': | |
543 | case 'y': /* For old-timers . . . */ | |
544 | match_fold = 1; | |
545 | break; | |
546 | case 'l': | |
547 | out_silent = 1; | |
548 | list_files = 1; | |
549 | break; | |
550 | case 'n': | |
551 | out_line = 1; | |
552 | break; | |
553 | case 's': | |
554 | out_silent = 1; | |
555 | break; | |
556 | case 'v': | |
557 | out_invert = 1; | |
558 | break; | |
559 | case 'w': | |
560 | match_words = 1; | |
561 | break; | |
562 | case 'x': | |
563 | match_lines = 1; | |
564 | break; | |
565 | default: | |
566 | usage(); | |
567 | break; | |
568 | } | |
569 | ||
570 | if (!keys) | |
571 | if (optind < argc) | |
572 | { | |
573 | keys = argv[optind++]; | |
574 | keycc = strlen(keys); | |
575 | } | |
576 | else | |
577 | usage(); | |
578 | ||
579 | compile(keys, keycc); | |
580 | ||
581 | if (argc - optind > 1 && !no_filenames) | |
582 | out_file = 1; | |
583 | ||
584 | status = 1; | |
585 | initbuf(); | |
586 | ||
587 | if (optind < argc) | |
588 | while (optind < argc) | |
589 | { | |
590 | desc = strcmp(argv[optind], "-") ? open(argv[optind], 0) : 0; | |
591 | if (desc < 0) | |
592 | error(argv[optind], errno); | |
593 | else | |
594 | { | |
595 | count = grep(desc, argv[optind]); | |
596 | if (count_matches) | |
597 | { | |
598 | if (out_file) | |
599 | printf("%s:", argv[optind]); | |
600 | printf("%d\n", count); | |
601 | } | |
602 | if (count) | |
603 | { | |
604 | status = 0; | |
605 | if (list_files) | |
606 | printf("%s\n", argv[optind]); | |
607 | } | |
608 | } | |
609 | if (desc) | |
610 | close(desc); | |
611 | ++optind; | |
612 | } | |
613 | else | |
614 | { | |
615 | count = grep(0, "<stdin>"); | |
616 | if (count_matches) | |
617 | printf("%d\n", count); | |
618 | if (count) | |
619 | { | |
620 | status = 0; | |
621 | if (list_files) | |
622 | printf("%s\n", argv[optind]); | |
623 | } | |
624 | } | |
625 | ||
626 | exit(error_seen ? 2 : status); | |
627 | } |