From: Ken Thompson Date: Mon, 26 Feb 1979 19:51:11 +0000 (-0500) Subject: Research V7 development X-Git-Tag: Bell-32V^2~133 X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/commitdiff_plain/0996f55af86feac0405d90e4aa0aa1ba0481f97b Research V7 development Work on file usr/src/cmd/egrep.y Co-Authored-By: Dennis Ritchie Synthesized-from: v7 --- diff --git a/usr/src/cmd/egrep.y b/usr/src/cmd/egrep.y new file mode 100644 index 0000000000..58b81c6a4d --- /dev/null +++ b/usr/src/cmd/egrep.y @@ -0,0 +1,594 @@ +/* + * egrep -- print lines containing (or not containing) a regular expression + * + * status returns: + * 0 - ok, and some matches + * 1 - ok, but no matches + * 2 - some error + */ +%token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST +%left OR +%left CHAR DOT CCL NCCL '(' +%left CAT +%left STAR PLUS QUEST + +%{ +#include + +#define MAXLIN 350 +#define MAXPOS 4000 +#define NCHARS 128 +#define NSTATES 128 +#define FINAL -1 +char gotofn[NSTATES][NCHARS]; +int state[NSTATES]; +char out[NSTATES]; +int line 1; +int name[MAXLIN]; +int left[MAXLIN]; +int right[MAXLIN]; +int parent[MAXLIN]; +int foll[MAXLIN]; +int positions[MAXPOS]; +char chars[MAXLIN]; +int nxtpos; +int nxtchar 0; +int tmpstat[MAXLIN]; +int initstat[MAXLIN]; +int xstate; +int count; +int icount; +char *input; + +long lnum; +int bflag; +int cflag; +int fflag; +int lflag; +int nflag; +int hflag = 1; +int sflag; +int vflag; +int nfile; +long blkno; +long tln; +int nsucc; + +int f; +int fname; +%} + +%% +s: t + ={ unary(FINAL, $1); + line--; + } + ; +t: b r + ={ $$ = node(CAT, $1, $2); } + | OR b r OR + ={ $$ = node(CAT, $2, $3); } + | OR b r + ={ $$ = node(CAT, $2, $3); } + | b r OR + ={ $$ = node(CAT, $1, $2); } + ; +b: + ={ $$ = enter(DOT); + $$ = unary(STAR, $$); } + ; +r: CHAR + ={ $$ = enter($1); } + | DOT + ={ $$ = enter(DOT); } + | CCL + ={ $$ = cclenter(CCL); } + | NCCL + ={ $$ = cclenter(NCCL); } + ; + +r: r OR r + ={ $$ = node(OR, $1, $3); } + | r r %prec CAT + ={ $$ = node(CAT, $1, $2); } + | r STAR + ={ $$ = unary(STAR, $1); } + | r PLUS + ={ $$ = unary(PLUS, $1); } + | r QUEST + ={ $$ = unary(QUEST, $1); } + | '(' r ')' + ={ $$ = $2; } + | error + ; + +%% +yyerror(s) { + fprintf(stderr, "egrep: %s\n", s); + exit(2); +} + +yylex() { + extern int yylval; + int cclcnt, x; + register char c, d; + switch(c = nextch()) { + case '$': + case '^': c = '\n'; + goto defchar; + case '|': return (OR); + case '*': return (STAR); + case '+': return (PLUS); + case '?': return (QUEST); + case '(': return (c); + case ')': return (c); + case '.': return (DOT); + case '\0': return (0); + case '\n': return (OR); + case '[': + x = CCL; + cclcnt = 0; + count = nxtchar++; + if ((c = nextch()) == '^') { + x = NCCL; + c = nextch(); + } + do { + if (c == '\0') synerror(); + if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { + if ((d = nextch()) != 0) { + c = chars[nxtchar-1]; + while (c < d) { + if (nxtchar >= MAXLIN) overflo(); + chars[nxtchar++] = ++c; + cclcnt++; + } + continue; + } + } + if (nxtchar >= MAXLIN) overflo(); + chars[nxtchar++] = c; + cclcnt++; + } while ((c = nextch()) != ']'); + chars[count] = cclcnt; + return (x); + case '\\': + if ((c = nextch()) == '\0') synerror(); + defchar: + default: yylval = c; return (CHAR); + } +} +nextch() { + register char c; + if (fflag) { + if ((c = getc(stdin)) == EOF) return(0); + } + else c = *input++; + return(c); +} + +synerror() { + fprintf(stderr, "egrep: syntax error\n"); + exit(2); +} + +enter(x) int x; { + if(line >= MAXLIN) overflo(); + name[line] = x; + left[line] = 0; + right[line] = 0; + return(line++); +} + +cclenter(x) int x; { + register linno; + linno = enter(x); + right[linno] = count; + return (linno); +} + +node(x, l, r) { + if(line >= MAXLIN) overflo(); + name[line] = x; + left[line] = l; + right[line] = r; + parent[l] = line; + parent[r] = line; + return(line++); +} + +unary(x, d) { + if(line >= MAXLIN) overflo(); + name[line] = x; + left[line] = d; + right[line] = 0; + parent[d] = line; + return(line++); +} +overflo() { + fprintf(stderr, "egrep: regular expression too long\n"); + exit(2); +} + +cfoll(v) { + register i; + if (left[v] == 0) { + count = 0; + for (i=1; i<=line; i++) tmpstat[i] = 0; + follow(v); + add(foll, v); + } + else if (right[v] == 0) cfoll(left[v]); + else { + cfoll(left[v]); + cfoll(right[v]); + } +} +cgotofn() { + register c, i, k; + int n, s; + char symbol[NCHARS]; + int j, nc, pc, pos; + int curpos, num; + int number, newpos; + count = 0; + for (n=3; n<=line; n++) tmpstat[n] = 0; + if (cstate(line-1)==0) { + tmpstat[line] = 1; + count++; + out[0] = 1; + } + for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; + count--; /*leave out position 1 */ + icount = count; + tmpstat[1] = 0; + add(state, 0); + n = 0; + for (s=0; s<=n; s++) { + if (out[s] == 1) continue; + for (i=0; i= 0) { + if (c < NCHARS) symbol[c] = 1; + else if (c == DOT) { + for (k=0; k= 0) + if ( + (k == c) + | (k == DOT) + | (k == CCL && member(c, right[curpos], 1)) + | (k == NCCL && member(c, right[curpos], 0)) + ) { + number = positions[foll[curpos]]; + newpos = foll[curpos] + 1; + for (k=0; k= NSTATES) overflo(); + add(state, ++n); + if (tmpstat[line] == 1) out[n] = 1; + gotofn[s][c] = n; + } + else { + gotofn[s][c] = xstate; + } + } + } + } +} + +cstate(v) { + register b; + if (left[v] == 0) { + if (tmpstat[v] != 1) { + tmpstat[v] = 1; + count++; + } + return(1); + } + else if (right[v] == 0) { + if (cstate(left[v]) == 0) return (0); + else if (name[v] == PLUS) return (1); + else return (0); + } + else if (name[v] == CAT) { + if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); + else return (1); + } + else { /* name[v] == OR */ + b = cstate(right[v]); + if (cstate(left[v]) == 0 || b == 0) return (0); + else return (1); + } +} + + +member(symb, set, torf) { + register i, num, pos; + num = chars[set]; + pos = set + 1; + for (i=0; i MAXPOS) overflo(); + array[n] = nxtpos; + positions[nxtpos++] = count; + for (i=3; i <= line; i++) { + if (tmpstat[i] == 1) { + positions[nxtpos++] = i; + } + } +} + +follow(v) int v; { + int p; + if (v == line) return; + p = parent[v]; + switch(name[p]) { + case STAR: + case PLUS: cstate(v); + follow(p); + return; + + case OR: + case QUEST: follow(p); + return; + + case CAT: if (v == left[p]) { + if (cstate(right[p]) == 0) { + follow(p); + return; + } + } + else follow(p); + return; + case FINAL: if (tmpstat[line] != 1) { + tmpstat[line] = 1; + count++; + } + return; + } +} + + +main(argc, argv) +char **argv; +{ + while (--argc > 0 && (++argv)[0][0]=='-') + switch (argv[0][1]) { + + case 's': + sflag++; + continue; + + case 'h': + hflag = 0; + continue; + + case 'b': + bflag++; + continue; + + case 'c': + cflag++; + continue; + + case 'e': + argc--; + argv++; + goto out; + + case 'f': + fflag++; + continue; + + case 'l': + lflag++; + continue; + + case 'n': + nflag++; + continue; + + case 'v': + vflag++; + continue; + + default: + fprintf(stderr, "egrep: unknown flag\n"); + continue; + } +out: + if (argc<=0) + exit(2); + if (fflag) { + if (freopen(fname = *argv, "r", stdin) == NULL) { + fprintf(stderr, "egrep: can't open %s\n", fname); + exit(2); + } + } + else input = *argv; + argc--; + argv++; + + yyparse(); + + cfoll(line-1); + cgotofn(); + nfile = argc; + if (argc<=0) { + if (lflag) exit(1); + execute(0); + } + else while (--argc >= 0) { + execute(*argv); + argv++; + } + exit(nsucc == 0); +} + +execute(file) +char *file; +{ + register char *p; + register cstat; + register ccount; + char buf[1024]; + char *nlp; + int istat; + if (file) { + if ((f = open(file, 0)) < 0) { + fprintf(stderr, "egrep: can't open %s\n", file); + exit(2); + } + } + else f = 0; + ccount = 0; + lnum = 1; + tln = 0; + p = buf; + nlp = p; + if ((ccount = read(f,p,512))<=0) goto done; + blkno = ccount; + istat = cstat = gotofn[0]['\n']; + if (out[cstat]) goto found; + for (;;) { + cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ + if (out[cstat]) { + found: for(;;) { + if (*p++ == '\n') { + if (vflag == 0) { + succeed: nsucc = 1; + if (cflag) tln++; + else if (sflag) + ; /* ugh */ + else if (lflag) { + printf("%s\n", file); + close(f); + return; + } + else { + if (nfile > 1 && hflag) printf("%s:", file); + if (bflag) printf("%ld:", (blkno-ccount-1)/512); + if (nflag) printf("%ld:", lnum); + if (p <= nlp) { + while (nlp < &buf[1024]) putchar(*nlp++); + nlp = buf; + } + while (nlp < p) putchar(*nlp++); + } + } + lnum++; + nlp = p; + if ((out[(cstat=istat)]) == 0) goto brk2; + } + cfound: + if (--ccount <= 0) { + if (p <= &buf[512]) { + if ((ccount = read(f, p, 512)) <= 0) goto done; + } + else if (p == &buf[1024]) { + p = buf; + if ((ccount = read(f, p, 512)) <= 0) goto done; + } + else { + if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; + } + if(nlp>p && nlp<=p+ccount) + nlp = p+ccount; + blkno += ccount; + } + } + } + if (*p++ == '\n') { + if (vflag) goto succeed; + else { + lnum++; + nlp = p; + if (out[(cstat=istat)]) goto cfound; + } + } + brk2: + if (--ccount <= 0) { + if (p <= &buf[512]) { + if ((ccount = read(f, p, 512)) <= 0) break; + } + else if (p == &buf[1024]) { + p = buf; + if ((ccount = read(f, p, 512)) <= 0) break; + } + else { + if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; + } + if(nlp>p && nlp<=p+ccount) + nlp = p+ccount; + blkno += ccount; + } + } +done: close(f); + if (cflag) { + if (nfile > 1) + printf("%s:", file); + printf("%ld\n", tln); + } +}