convert namelist structure to use unions
[unix-history] / usr / src / usr.bin / grep / old.egrep / old.egrep.y
CommitLineData
17377d12
BJ
1/*
2 * egrep -- print lines containing (or not containing) a regular expression
3 *
4 * status returns:
5 * 0 - ok, and some matches
6 * 1 - ok, but no matches
7 * 2 - some error
8 */
9%token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
10%left OR
11%left CHAR DOT CCL NCCL '('
12%left CAT
13%left STAR PLUS QUEST
14
15%{
16static char *sccsid = "@(#)old.egrep.y 4.1 (Berkeley) %G%";
17#include <stdio.h>
18
19#define MAXLIN 350
20#define MAXPOS 4000
21#define NCHARS 128
22#define NSTATES 128
23#define FINAL -1
24char gotofn[NSTATES][NCHARS];
25int state[NSTATES];
26char out[NSTATES];
27int line = 1;
28int name[MAXLIN];
29int left[MAXLIN];
30int right[MAXLIN];
31int parent[MAXLIN];
32int foll[MAXLIN];
33int positions[MAXPOS];
34char chars[MAXLIN];
35int nxtpos;
36int nxtchar = 0;
37int tmpstat[MAXLIN];
38int initstat[MAXLIN];
39int xstate;
40int count;
41int icount;
42char *input;
43
44long lnum;
45int bflag;
46int cflag;
47int fflag;
48int lflag;
49int nflag;
50int hflag = 1;
51int sflag;
52int vflag;
53int nfile;
54int blkno;
55long tln;
56int nsucc;
57
58int f;
59int fname;
60%}
61
62%%
63s: t
64 ={ unary(FINAL, $1);
65 line--;
66 }
67 ;
68t: b r
69 ={ $$ = node(CAT, $1, $2); }
70 | OR b r OR
71 ={ $$ = node(CAT, $2, $3); }
72 | OR b r
73 ={ $$ = node(CAT, $2, $3); }
74 | b r OR
75 ={ $$ = node(CAT, $1, $2); }
76 ;
77b:
78 ={ $$ = enter(DOT);
79 $$ = unary(STAR, $$); }
80 ;
81r: CHAR
82 ={ $$ = enter($1); }
83 | DOT
84 ={ $$ = enter(DOT); }
85 | CCL
86 ={ $$ = cclenter(CCL); }
87 | NCCL
88 ={ $$ = cclenter(NCCL); }
89 ;
90
91r: r OR r
92 ={ $$ = node(OR, $1, $3); }
93 | r r %prec CAT
94 ={ $$ = node(CAT, $1, $2); }
95 | r STAR
96 ={ $$ = unary(STAR, $1); }
97 | r PLUS
98 ={ $$ = unary(PLUS, $1); }
99 | r QUEST
100 ={ $$ = unary(QUEST, $1); }
101 | '(' r ')'
102 ={ $$ = $2; }
103 | error
104 ;
105
106%%
107yyerror(s) {
108 fprintf(stderr, "egrep: %s\n", s);
109 exit(2);
110}
111
112yylex() {
113 extern int yylval;
114 int cclcnt, x;
115 register char c, d;
116 switch(c = nextch()) {
117 case '$':
118 case '^': c = '\n';
119 goto defchar;
120 case '|': return (OR);
121 case '*': return (STAR);
122 case '+': return (PLUS);
123 case '?': return (QUEST);
124 case '(': return (c);
125 case ')': return (c);
126 case '.': return (DOT);
127 case '\0': return (0);
128 case '\n': return (OR);
129 case '[':
130 x = CCL;
131 cclcnt = 0;
132 count = nxtchar++;
133 if ((c = nextch()) == '^') {
134 x = NCCL;
135 c = nextch();
136 }
137 do {
138 if (c == '\0') synerror();
139 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
140 if ((d = nextch()) != 0) {
141 c = chars[nxtchar-1];
142 while (c < d) {
143 if (nxtchar >= MAXLIN) overflo();
144 chars[nxtchar++] = ++c;
145 cclcnt++;
146 }
147 continue;
148 }
149 }
150 if (nxtchar >= MAXLIN) overflo();
151 chars[nxtchar++] = c;
152 cclcnt++;
153 } while ((c = nextch()) != ']');
154 chars[count] = cclcnt;
155 return (x);
156 case '\\':
157 if ((c = nextch()) == '\0') synerror();
158 defchar:
159 default: yylval = c; return (CHAR);
160 }
161}
162nextch() {
163 register char c;
164 if (fflag) {
165 if ((c = getc(stdin)) == EOF) return(0);
166 }
167 else c = *input++;
168 return(c);
169}
170
171synerror() {
172 fprintf(stderr, "egrep: syntax error\n");
173 exit(2);
174}
175
176enter(x) int x; {
177 if(line >= MAXLIN) overflo();
178 name[line] = x;
179 left[line] = 0;
180 right[line] = 0;
181 return(line++);
182}
183
184cclenter(x) int x; {
185 register linno;
186 linno = enter(x);
187 right[linno] = count;
188 return (linno);
189}
190
191node(x, l, r) {
192 if(line >= MAXLIN) overflo();
193 name[line] = x;
194 left[line] = l;
195 right[line] = r;
196 parent[l] = line;
197 parent[r] = line;
198 return(line++);
199}
200
201unary(x, d) {
202 if(line >= MAXLIN) overflo();
203 name[line] = x;
204 left[line] = d;
205 right[line] = 0;
206 parent[d] = line;
207 return(line++);
208}
209overflo() {
210 fprintf(stderr, "egrep: regular expression too long\n");
211 exit(2);
212}
213
214cfoll(v) {
215 register i;
216 if (left[v] == 0) {
217 count = 0;
218 for (i=1; i<=line; i++) tmpstat[i] = 0;
219 follow(v);
220 add(foll, v);
221 }
222 else if (right[v] == 0) cfoll(left[v]);
223 else {
224 cfoll(left[v]);
225 cfoll(right[v]);
226 }
227}
228cgotofn() {
229 register c, i, k;
230 int n, s;
231 char symbol[NCHARS];
232 int j, nc, pc, pos;
233 int curpos, num;
234 int number, newpos;
235 count = 0;
236 for (n=3; n<=line; n++) tmpstat[n] = 0;
237 if (cstate(line-1)==0) {
238 tmpstat[line] = 1;
239 count++;
240 out[0] = 1;
241 }
242 for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
243 count--; /*leave out position 1 */
244 icount = count;
245 tmpstat[1] = 0;
246 add(state, 0);
247 n = 0;
248 for (s=0; s<=n; s++) {
249 if (out[s] == 1) continue;
250 for (i=0; i<NCHARS; i++) symbol[i] = 0;
251 num = positions[state[s]];
252 count = icount;
253 for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
254 pos = state[s] + 1;
255 for (i=0; i<num; i++) {
256 curpos = positions[pos];
257 if ((c = name[curpos]) >= 0) {
258 if (c < NCHARS) symbol[c] = 1;
259 else if (c == DOT) {
260 for (k=0; k<NCHARS; k++)
261 if (k!='\n') symbol[k] = 1;
262 }
263 else if (c == CCL) {
264 nc = chars[right[curpos]];
265 pc = right[curpos] + 1;
266 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
267 }
268 else if (c == NCCL) {
269 nc = chars[right[curpos]];
270 for (j = 0; j < NCHARS; j++) {
271 pc = right[curpos] + 1;
272 for (k = 0; k < nc; k++)
273 if (j==chars[pc++]) goto cont;
274 if (j!='\n') symbol[j] = 1;
275 cont:;
276 }
277 }
278 else printf("something's funny\n");
279 }
280 pos++;
281 }
282 for (c=0; c<NCHARS; c++) {
283 if (symbol[c] == 1) { /* nextstate(s,c) */
284 count = icount;
285 for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
286 pos = state[s] + 1;
287 for (i=0; i<num; i++) {
288 curpos = positions[pos];
289 if ((k = name[curpos]) >= 0)
290 if (
291 (k == c)
292 | (k == DOT)
293 | (k == CCL && member(c, right[curpos], 1))
294 | (k == NCCL && member(c, right[curpos], 0))
295 ) {
296 number = positions[foll[curpos]];
297 newpos = foll[curpos] + 1;
298 for (k=0; k<number; k++) {
299 if (tmpstat[positions[newpos]] != 1) {
300 tmpstat[positions[newpos]] = 1;
301 count++;
302 }
303 newpos++;
304 }
305 }
306 pos++;
307 } /* end nextstate */
308 if (notin(n)) {
309 if (n >= NSTATES) overflo();
310 add(state, ++n);
311 if (tmpstat[line] == 1) out[n] = 1;
312 gotofn[s][c] = n;
313 }
314 else {
315 gotofn[s][c] = xstate;
316 }
317 }
318 }
319 }
320}
321
322cstate(v) {
323 register b;
324 if (left[v] == 0) {
325 if (tmpstat[v] != 1) {
326 tmpstat[v] = 1;
327 count++;
328 }
329 return(1);
330 }
331 else if (right[v] == 0) {
332 if (cstate(left[v]) == 0) return (0);
333 else if (name[v] == PLUS) return (1);
334 else return (0);
335 }
336 else if (name[v] == CAT) {
337 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
338 else return (1);
339 }
340 else { /* name[v] == OR */
341 b = cstate(right[v]);
342 if (cstate(left[v]) == 0 || b == 0) return (0);
343 else return (1);
344 }
345}
346
347
348member(symb, set, torf) {
349 register i, num, pos;
350 num = chars[set];
351 pos = set + 1;
352 for (i=0; i<num; i++)
353 if (symb == chars[pos++]) return (torf);
354 return (!torf);
355}
356
357notin(n) {
358 register i, j, pos;
359 for (i=0; i<=n; i++) {
360 if (positions[state[i]] == count) {
361 pos = state[i] + 1;
362 for (j=0; j < count; j++)
363 if (tmpstat[positions[pos++]] != 1) goto nxt;
364 xstate = i;
365 return (0);
366 }
367 nxt: ;
368 }
369 return (1);
370}
371
372add(array, n) int *array; {
373 register i;
374 if (nxtpos + count > MAXPOS) overflo();
375 array[n] = nxtpos;
376 positions[nxtpos++] = count;
377 for (i=3; i <= line; i++) {
378 if (tmpstat[i] == 1) {
379 positions[nxtpos++] = i;
380 }
381 }
382}
383
384follow(v) int v; {
385 int p;
386 if (v == line) return;
387 p = parent[v];
388 switch(name[p]) {
389 case STAR:
390 case PLUS: cstate(v);
391 follow(p);
392 return;
393
394 case OR:
395 case QUEST: follow(p);
396 return;
397
398 case CAT: if (v == left[p]) {
399 if (cstate(right[p]) == 0) {
400 follow(p);
401 return;
402 }
403 }
404 else follow(p);
405 return;
406 case FINAL: if (tmpstat[line] != 1) {
407 tmpstat[line] = 1;
408 count++;
409 }
410 return;
411 }
412}
413
414
415main(argc, argv)
416char **argv;
417{
418 while (--argc > 0 && (++argv)[0][0]=='-')
419 switch (argv[0][1]) {
420
421 case 's':
422 sflag++;
423 continue;
424
425 case 'h':
426 hflag = 0;
427 continue;
428
429 case 'b':
430 bflag++;
431 continue;
432
433 case 'c':
434 cflag++;
435 continue;
436
437 case 'e':
438 argc--;
439 argv++;
440 goto out;
441
442 case 'f':
443 fflag++;
444 continue;
445
446 case 'l':
447 lflag++;
448 continue;
449
450 case 'n':
451 nflag++;
452 continue;
453
454 case 'v':
455 vflag++;
456 continue;
457
458 default:
459 fprintf(stderr, "egrep: unknown flag\n");
460 continue;
461 }
462out:
463 if (argc<=0)
464 exit(2);
465 if (fflag) {
466 if (freopen(fname = *argv, "r", stdin) == NULL) {
467 fprintf(stderr, "egrep: can't open %s\n", fname);
468 exit(2);
469 }
470 }
471 else input = *argv;
472 argc--;
473 argv++;
474
475 yyparse();
476
477 cfoll(line-1);
478 cgotofn();
479 nfile = argc;
480 if (argc<=0) {
481 if (lflag) exit(1);
482 execute(0);
483 }
484 else while (--argc >= 0) {
485 execute(*argv);
486 argv++;
487 }
488 exit(nsucc == 0);
489}
490
491execute(file)
492char *file;
493{
494 register char *p;
495 register cstat;
496 register ccount;
497 char buf[1024];
498 char *nlp;
499 int istat;
500 if (file) {
501 if ((f = open(file, 0)) < 0) {
502 fprintf(stderr, "egrep: can't open %s\n", file);
503 exit(2);
504 }
505 }
506 else f = 0;
507 ccount = 0;
508 lnum = 1;
509 tln = 0;
510 blkno = 0;
511 p = buf;
512 nlp = p;
513 if ((ccount = read(f,p,512))<=0) goto done;
514 istat = cstat = gotofn[0]['\n'];
515 if (out[cstat]) goto found;
516 for (;;) {
517 cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
518 if (out[cstat]) {
519 found: for(;;) {
520 if (*p++ == '\n') {
521 if (vflag == 0) {
522 succeed: nsucc = 1;
523 if (cflag) tln++;
524 else if (sflag)
525 ; /* ugh */
526 else if (lflag) {
527 printf("%s\n", file);
528 close(f);
529 return;
530 }
531 else {
532 if (nfile > 1 && hflag) printf("%s:", file);
533 if (bflag) printf("%d:", blkno);
534 if (nflag) printf("%ld:", lnum);
535 if (p <= nlp) {
536 while (nlp < &buf[1024]) putchar(*nlp++);
537 nlp = buf;
538 }
539 while (nlp < p) putchar(*nlp++);
540 }
541 }
542 lnum++;
543 nlp = p;
544 if ((out[(cstat=istat)]) == 0) goto brk2;
545 }
546 cfound:
547 if (--ccount <= 0) {
548 if (p <= &buf[512]) {
549 if ((ccount = read(f, p, 512)) <= 0) goto done;
550 }
551 else if (p == &buf[1024]) {
552 p = buf;
553 if ((ccount = read(f, p, 512)) <= 0) goto done;
554 }
555 else {
556 if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
557 }
558 blkno++;
559 }
560 }
561 }
562 if (*p++ == '\n') {
563 if (vflag) goto succeed;
564 else {
565 lnum++;
566 nlp = p;
567 if (out[(cstat=istat)]) goto cfound;
568 }
569 }
570 brk2:
571 if (--ccount <= 0) {
572 if (p <= &buf[512]) {
573 if ((ccount = read(f, p, 512)) <= 0) break;
574 }
575 else if (p == &buf[1024]) {
576 p = buf;
577 if ((ccount = read(f, p, 512)) <= 0) break;
578 }
579 else {
580 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
581 }
582 blkno++;
583 }
584 }
585done: close(f);
586 if (cflag) {
587 if (nfile > 1)
588 printf("%s:", file);
589 printf("%ld\n", tln);
590 }
591}