Bell 32V release
[unix-history] / usr / src / cmd / egrep.y
CommitLineData
16edd151
TL
1/*
2 * egrep -- print lines containing (or not containing) a regular expression
3 *
4 * status returns:
5 * 0 - ok, and some matches
6 * 1 - ok, but no matches
7 * 2 - some error
8 */
9%token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
10%left OR
11%left CHAR DOT CCL NCCL '('
12%left CAT
13%left STAR PLUS QUEST
14
15%{
16#include <stdio.h>
17
18#define MAXLIN 350
19#define MAXPOS 4000
20#define NCHARS 128
21#define NSTATES 128
22#define FINAL -1
23char gotofn[NSTATES][NCHARS];
24int state[NSTATES];
25char out[NSTATES];
26int line = 1;
27int name[MAXLIN];
28int left[MAXLIN];
29int right[MAXLIN];
30int parent[MAXLIN];
31int foll[MAXLIN];
32int positions[MAXPOS];
33char chars[MAXLIN];
34int nxtpos;
35int nxtchar = 0;
36int tmpstat[MAXLIN];
37int initstat[MAXLIN];
38int xstate;
39int count;
40int icount;
41char *input;
42
43long lnum;
44int bflag;
45int cflag;
46int fflag;
47int lflag;
48int nflag;
49int hflag = 1;
50int sflag;
51int vflag;
52int nfile;
53int blkno;
54long tln;
55int nsucc;
56
57int f;
58int fname;
59%}
60
61%%
62s: t
63 ={ unary(FINAL, $1);
64 line--;
65 }
66 ;
67t: b r
68 ={ $$ = node(CAT, $1, $2); }
69 | OR b r OR
70 ={ $$ = node(CAT, $2, $3); }
71 | OR b r
72 ={ $$ = node(CAT, $2, $3); }
73 | b r OR
74 ={ $$ = node(CAT, $1, $2); }
75 ;
76b:
77 ={ $$ = enter(DOT);
78 $$ = unary(STAR, $$); }
79 ;
80r: CHAR
81 ={ $$ = enter($1); }
82 | DOT
83 ={ $$ = enter(DOT); }
84 | CCL
85 ={ $$ = cclenter(CCL); }
86 | NCCL
87 ={ $$ = cclenter(NCCL); }
88 ;
89
90r: r OR r
91 ={ $$ = node(OR, $1, $3); }
92 | r r %prec CAT
93 ={ $$ = node(CAT, $1, $2); }
94 | r STAR
95 ={ $$ = unary(STAR, $1); }
96 | r PLUS
97 ={ $$ = unary(PLUS, $1); }
98 | r QUEST
99 ={ $$ = unary(QUEST, $1); }
100 | '(' r ')'
101 ={ $$ = $2; }
102 | error
103 ;
104
105%%
106yyerror(s) {
107 fprintf(stderr, "egrep: %s\n", s);
108 exit(2);
109}
110
111yylex() {
112 extern int yylval;
113 int cclcnt, x;
114 register char c, d;
115 switch(c = nextch()) {
116 case '$':
117 case '^': c = '\n';
118 goto defchar;
119 case '|': return (OR);
120 case '*': return (STAR);
121 case '+': return (PLUS);
122 case '?': return (QUEST);
123 case '(': return (c);
124 case ')': return (c);
125 case '.': return (DOT);
126 case '\0': return (0);
127 case '\n': return (OR);
128 case '[':
129 x = CCL;
130 cclcnt = 0;
131 count = nxtchar++;
132 if ((c = nextch()) == '^') {
133 x = NCCL;
134 c = nextch();
135 }
136 do {
137 if (c == '\0') synerror();
138 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
139 if ((d = nextch()) != 0) {
140 c = chars[nxtchar-1];
141 while (c < d) {
142 if (nxtchar >= MAXLIN) overflo();
143 chars[nxtchar++] = ++c;
144 cclcnt++;
145 }
146 continue;
147 }
148 }
149 if (nxtchar >= MAXLIN) overflo();
150 chars[nxtchar++] = c;
151 cclcnt++;
152 } while ((c = nextch()) != ']');
153 chars[count] = cclcnt;
154 return (x);
155 case '\\':
156 if ((c = nextch()) == '\0') synerror();
157 defchar:
158 default: yylval = c; return (CHAR);
159 }
160}
161nextch() {
162 register char c;
163 if (fflag) {
164 if ((c = getc(stdin)) == EOF) return(0);
165 }
166 else c = *input++;
167 return(c);
168}
169
170synerror() {
171 fprintf(stderr, "egrep: syntax error\n");
172 exit(2);
173}
174
175enter(x) int x; {
176 if(line >= MAXLIN) overflo();
177 name[line] = x;
178 left[line] = 0;
179 right[line] = 0;
180 return(line++);
181}
182
183cclenter(x) int x; {
184 register linno;
185 linno = enter(x);
186 right[linno] = count;
187 return (linno);
188}
189
190node(x, l, r) {
191 if(line >= MAXLIN) overflo();
192 name[line] = x;
193 left[line] = l;
194 right[line] = r;
195 parent[l] = line;
196 parent[r] = line;
197 return(line++);
198}
199
200unary(x, d) {
201 if(line >= MAXLIN) overflo();
202 name[line] = x;
203 left[line] = d;
204 right[line] = 0;
205 parent[d] = line;
206 return(line++);
207}
208overflo() {
209 fprintf(stderr, "egrep: regular expression too long\n");
210 exit(2);
211}
212
213cfoll(v) {
214 register i;
215 if (left[v] == 0) {
216 count = 0;
217 for (i=1; i<=line; i++) tmpstat[i] = 0;
218 follow(v);
219 add(foll, v);
220 }
221 else if (right[v] == 0) cfoll(left[v]);
222 else {
223 cfoll(left[v]);
224 cfoll(right[v]);
225 }
226}
227cgotofn() {
228 register c, i, k;
229 int n, s;
230 char symbol[NCHARS];
231 int j, nc, pc, pos;
232 int curpos, num;
233 int number, newpos;
234 count = 0;
235 for (n=3; n<=line; n++) tmpstat[n] = 0;
236 if (cstate(line-1)==0) {
237 tmpstat[line] = 1;
238 count++;
239 out[0] = 1;
240 }
241 for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
242 count--; /*leave out position 1 */
243 icount = count;
244 tmpstat[1] = 0;
245 add(state, 0);
246 n = 0;
247 for (s=0; s<=n; s++) {
248 if (out[s] == 1) continue;
249 for (i=0; i<NCHARS; i++) symbol[i] = 0;
250 num = positions[state[s]];
251 count = icount;
252 for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
253 pos = state[s] + 1;
254 for (i=0; i<num; i++) {
255 curpos = positions[pos];
256 if ((c = name[curpos]) >= 0) {
257 if (c < NCHARS) symbol[c] = 1;
258 else if (c == DOT) {
259 for (k=0; k<NCHARS; k++)
260 if (k!='\n') symbol[k] = 1;
261 }
262 else if (c == CCL) {
263 nc = chars[right[curpos]];
264 pc = right[curpos] + 1;
265 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
266 }
267 else if (c == NCCL) {
268 nc = chars[right[curpos]];
269 for (j = 0; j < NCHARS; j++) {
270 pc = right[curpos] + 1;
271 for (k = 0; k < nc; k++)
272 if (j==chars[pc++]) goto cont;
273 if (j!='\n') symbol[j] = 1;
274 cont:;
275 }
276 }
277 else printf("something's funny\n");
278 }
279 pos++;
280 }
281 for (c=0; c<NCHARS; c++) {
282 if (symbol[c] == 1) { /* nextstate(s,c) */
283 count = icount;
284 for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
285 pos = state[s] + 1;
286 for (i=0; i<num; i++) {
287 curpos = positions[pos];
288 if ((k = name[curpos]) >= 0)
289 if (
290 (k == c)
291 | (k == DOT)
292 | (k == CCL && member(c, right[curpos], 1))
293 | (k == NCCL && member(c, right[curpos], 0))
294 ) {
295 number = positions[foll[curpos]];
296 newpos = foll[curpos] + 1;
297 for (k=0; k<number; k++) {
298 if (tmpstat[positions[newpos]] != 1) {
299 tmpstat[positions[newpos]] = 1;
300 count++;
301 }
302 newpos++;
303 }
304 }
305 pos++;
306 } /* end nextstate */
307 if (notin(n)) {
308 if (n >= NSTATES) overflo();
309 add(state, ++n);
310 if (tmpstat[line] == 1) out[n] = 1;
311 gotofn[s][c] = n;
312 }
313 else {
314 gotofn[s][c] = xstate;
315 }
316 }
317 }
318 }
319}
320
321cstate(v) {
322 register b;
323 if (left[v] == 0) {
324 if (tmpstat[v] != 1) {
325 tmpstat[v] = 1;
326 count++;
327 }
328 return(1);
329 }
330 else if (right[v] == 0) {
331 if (cstate(left[v]) == 0) return (0);
332 else if (name[v] == PLUS) return (1);
333 else return (0);
334 }
335 else if (name[v] == CAT) {
336 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
337 else return (1);
338 }
339 else { /* name[v] == OR */
340 b = cstate(right[v]);
341 if (cstate(left[v]) == 0 || b == 0) return (0);
342 else return (1);
343 }
344}
345
346
347member(symb, set, torf) {
348 register i, num, pos;
349 num = chars[set];
350 pos = set + 1;
351 for (i=0; i<num; i++)
352 if (symb == chars[pos++]) return (torf);
353 return (!torf);
354}
355
356notin(n) {
357 register i, j, pos;
358 for (i=0; i<=n; i++) {
359 if (positions[state[i]] == count) {
360 pos = state[i] + 1;
361 for (j=0; j < count; j++)
362 if (tmpstat[positions[pos++]] != 1) goto nxt;
363 xstate = i;
364 return (0);
365 }
366 nxt: ;
367 }
368 return (1);
369}
370
371add(array, n) int *array; {
372 register i;
373 if (nxtpos + count > MAXPOS) overflo();
374 array[n] = nxtpos;
375 positions[nxtpos++] = count;
376 for (i=3; i <= line; i++) {
377 if (tmpstat[i] == 1) {
378 positions[nxtpos++] = i;
379 }
380 }
381}
382
383follow(v) int v; {
384 int p;
385 if (v == line) return;
386 p = parent[v];
387 switch(name[p]) {
388 case STAR:
389 case PLUS: cstate(v);
390 follow(p);
391 return;
392
393 case OR:
394 case QUEST: follow(p);
395 return;
396
397 case CAT: if (v == left[p]) {
398 if (cstate(right[p]) == 0) {
399 follow(p);
400 return;
401 }
402 }
403 else follow(p);
404 return;
405 case FINAL: if (tmpstat[line] != 1) {
406 tmpstat[line] = 1;
407 count++;
408 }
409 return;
410 }
411}
412
413
414main(argc, argv)
415char **argv;
416{
417 while (--argc > 0 && (++argv)[0][0]=='-')
418 switch (argv[0][1]) {
419
420 case 's':
421 sflag++;
422 continue;
423
424 case 'h':
425 hflag = 0;
426 continue;
427
428 case 'b':
429 bflag++;
430 continue;
431
432 case 'c':
433 cflag++;
434 continue;
435
436 case 'e':
437 argc--;
438 argv++;
439 goto out;
440
441 case 'f':
442 fflag++;
443 continue;
444
445 case 'l':
446 lflag++;
447 continue;
448
449 case 'n':
450 nflag++;
451 continue;
452
453 case 'v':
454 vflag++;
455 continue;
456
457 default:
458 fprintf(stderr, "egrep: unknown flag\n");
459 continue;
460 }
461out:
462 if (argc<=0)
463 exit(2);
464 if (fflag) {
465 if (freopen(fname = *argv, "r", stdin) == NULL) {
466 fprintf(stderr, "egrep: can't open %s\n", fname);
467 exit(2);
468 }
469 }
470 else input = *argv;
471 argc--;
472 argv++;
473
474 yyparse();
475
476 cfoll(line-1);
477 cgotofn();
478 nfile = argc;
479 if (argc<=0) {
480 if (lflag) exit(1);
481 execute(0);
482 }
483 else while (--argc >= 0) {
484 execute(*argv);
485 argv++;
486 }
487 exit(nsucc == 0);
488}
489
490execute(file)
491char *file;
492{
493 register char *p;
494 register cstat;
495 register ccount;
496 char buf[1024];
497 char *nlp;
498 int istat;
499 if (file) {
500 if ((f = open(file, 0)) < 0) {
501 fprintf(stderr, "egrep: can't open %s\n", file);
502 exit(2);
503 }
504 }
505 else f = 0;
506 ccount = 0;
507 lnum = 1;
508 tln = 0;
509 blkno = 0;
510 p = buf;
511 nlp = p;
512 if ((ccount = read(f,p,512))<=0) goto done;
513 istat = cstat = gotofn[0]['\n'];
514 if (out[cstat]) goto found;
515 for (;;) {
516 cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
517 if (out[cstat]) {
518 found: for(;;) {
519 if (*p++ == '\n') {
520 if (vflag == 0) {
521 succeed: nsucc = 1;
522 if (cflag) tln++;
523 else if (sflag)
524 ; /* ugh */
525 else if (lflag) {
526 printf("%s\n", file);
527 close(f);
528 return;
529 }
530 else {
531 if (nfile > 1 && hflag) printf("%s:", file);
532 if (bflag) printf("%d:", blkno);
533 if (nflag) printf("%ld:", lnum);
534 if (p <= nlp) {
535 while (nlp < &buf[1024]) putchar(*nlp++);
536 nlp = buf;
537 }
538 while (nlp < p) putchar(*nlp++);
539 }
540 }
541 lnum++;
542 nlp = p;
543 if ((out[(cstat=istat)]) == 0) goto brk2;
544 }
545 cfound:
546 if (--ccount <= 0) {
547 if (p <= &buf[512]) {
548 if ((ccount = read(f, p, 512)) <= 0) goto done;
549 }
550 else if (p == &buf[1024]) {
551 p = buf;
552 if ((ccount = read(f, p, 512)) <= 0) goto done;
553 }
554 else {
555 if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
556 }
557 blkno++;
558 }
559 }
560 }
561 if (*p++ == '\n') {
562 if (vflag) goto succeed;
563 else {
564 lnum++;
565 nlp = p;
566 if (out[(cstat=istat)]) goto cfound;
567 }
568 }
569 brk2:
570 if (--ccount <= 0) {
571 if (p <= &buf[512]) {
572 if ((ccount = read(f, p, 512)) <= 0) break;
573 }
574 else if (p == &buf[1024]) {
575 p = buf;
576 if ((ccount = read(f, p, 512)) <= 0) break;
577 }
578 else {
579 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
580 }
581 blkno++;
582 }
583 }
584done: close(f);
585 if (cflag) {
586 if (nfile > 1)
587 printf("%s:", file);
588 printf("%ld\n", tln);
589 }
590}