BSD 4 release
[unix-history] / usr / src / cmd / egrep.y
/*
* egrep -- print lines containing (or not containing) a regular expression
*
* status returns:
* 0 - ok, and some matches
* 1 - ok, but no matches
* 2 - some error
*/
%token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
%left OR
%left CHAR DOT CCL NCCL '('
%left CAT
%left STAR PLUS QUEST
%{
static char *sccsid = "@(#)egrep.y 4.1 (Berkeley) 10/1/80";
#include <stdio.h>
#define MAXLIN 350
#define MAXPOS 4000
#define NCHARS 128
#define NSTATES 128
#define FINAL -1
char gotofn[NSTATES][NCHARS];
int state[NSTATES];
char out[NSTATES];
int line = 1;
int name[MAXLIN];
int left[MAXLIN];
int right[MAXLIN];
int parent[MAXLIN];
int foll[MAXLIN];
int positions[MAXPOS];
char chars[MAXLIN];
int nxtpos;
int nxtchar = 0;
int tmpstat[MAXLIN];
int initstat[MAXLIN];
int xstate;
int count;
int icount;
char *input;
long lnum;
int bflag;
int cflag;
int fflag;
int lflag;
int nflag;
int hflag = 1;
int sflag;
int vflag;
int nfile;
int blkno;
long tln;
int nsucc;
int f;
int fname;
%}
%%
s: t
={ unary(FINAL, $1);
line--;
}
;
t: b r
={ $$ = node(CAT, $1, $2); }
| OR b r OR
={ $$ = node(CAT, $2, $3); }
| OR b r
={ $$ = node(CAT, $2, $3); }
| b r OR
={ $$ = node(CAT, $1, $2); }
;
b:
={ $$ = enter(DOT);
$$ = unary(STAR, $$); }
;
r: CHAR
={ $$ = enter($1); }
| DOT
={ $$ = enter(DOT); }
| CCL
={ $$ = cclenter(CCL); }
| NCCL
={ $$ = cclenter(NCCL); }
;
r: r OR r
={ $$ = node(OR, $1, $3); }
| r r %prec CAT
={ $$ = node(CAT, $1, $2); }
| r STAR
={ $$ = unary(STAR, $1); }
| r PLUS
={ $$ = unary(PLUS, $1); }
| r QUEST
={ $$ = unary(QUEST, $1); }
| '(' r ')'
={ $$ = $2; }
| error
;
%%
yyerror(s) {
fprintf(stderr, "egrep: %s\n", s);
exit(2);
}
yylex() {
extern int yylval;
int cclcnt, x;
register char c, d;
switch(c = nextch()) {
case '$':
case '^': c = '\n';
goto defchar;
case '|': return (OR);
case '*': return (STAR);
case '+': return (PLUS);
case '?': return (QUEST);
case '(': return (c);
case ')': return (c);
case '.': return (DOT);
case '\0': return (0);
case '\n': return (OR);
case '[':
x = CCL;
cclcnt = 0;
count = nxtchar++;
if ((c = nextch()) == '^') {
x = NCCL;
c = nextch();
}
do {
if (c == '\0') synerror();
if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
if ((d = nextch()) != 0) {
c = chars[nxtchar-1];
while (c < d) {
if (nxtchar >= MAXLIN) overflo();
chars[nxtchar++] = ++c;
cclcnt++;
}
continue;
}
}
if (nxtchar >= MAXLIN) overflo();
chars[nxtchar++] = c;
cclcnt++;
} while ((c = nextch()) != ']');
chars[count] = cclcnt;
return (x);
case '\\':
if ((c = nextch()) == '\0') synerror();
defchar:
default: yylval = c; return (CHAR);
}
}
nextch() {
register char c;
if (fflag) {
if ((c = getc(stdin)) == EOF) return(0);
}
else c = *input++;
return(c);
}
synerror() {
fprintf(stderr, "egrep: syntax error\n");
exit(2);
}
enter(x) int x; {
if(line >= MAXLIN) overflo();
name[line] = x;
left[line] = 0;
right[line] = 0;
return(line++);
}
cclenter(x) int x; {
register linno;
linno = enter(x);
right[linno] = count;
return (linno);
}
node(x, l, r) {
if(line >= MAXLIN) overflo();
name[line] = x;
left[line] = l;
right[line] = r;
parent[l] = line;
parent[r] = line;
return(line++);
}
unary(x, d) {
if(line >= MAXLIN) overflo();
name[line] = x;
left[line] = d;
right[line] = 0;
parent[d] = line;
return(line++);
}
overflo() {
fprintf(stderr, "egrep: regular expression too long\n");
exit(2);
}
cfoll(v) {
register i;
if (left[v] == 0) {
count = 0;
for (i=1; i<=line; i++) tmpstat[i] = 0;
follow(v);
add(foll, v);
}
else if (right[v] == 0) cfoll(left[v]);
else {
cfoll(left[v]);
cfoll(right[v]);
}
}
cgotofn() {
register c, i, k;
int n, s;
char symbol[NCHARS];
int j, nc, pc, pos;
int curpos, num;
int number, newpos;
count = 0;
for (n=3; n<=line; n++) tmpstat[n] = 0;
if (cstate(line-1)==0) {
tmpstat[line] = 1;
count++;
out[0] = 1;
}
for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
count--; /*leave out position 1 */
icount = count;
tmpstat[1] = 0;
add(state, 0);
n = 0;
for (s=0; s<=n; s++) {
if (out[s] == 1) continue;
for (i=0; i<NCHARS; i++) symbol[i] = 0;
num = positions[state[s]];
count = icount;
for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
pos = state[s] + 1;
for (i=0; i<num; i++) {
curpos = positions[pos];
if ((c = name[curpos]) >= 0) {
if (c < NCHARS) symbol[c] = 1;
else if (c == DOT) {
for (k=0; k<NCHARS; k++)
if (k!='\n') symbol[k] = 1;
}
else if (c == CCL) {
nc = chars[right[curpos]];
pc = right[curpos] + 1;
for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
}
else if (c == NCCL) {
nc = chars[right[curpos]];
for (j = 0; j < NCHARS; j++) {
pc = right[curpos] + 1;
for (k = 0; k < nc; k++)
if (j==chars[pc++]) goto cont;
if (j!='\n') symbol[j] = 1;
cont:;
}
}
else printf("something's funny\n");
}
pos++;
}
for (c=0; c<NCHARS; c++) {
if (symbol[c] == 1) { /* nextstate(s,c) */
count = icount;
for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
pos = state[s] + 1;
for (i=0; i<num; i++) {
curpos = positions[pos];
if ((k = name[curpos]) >= 0)
if (
(k == c)
| (k == DOT)
| (k == CCL && member(c, right[curpos], 1))
| (k == NCCL && member(c, right[curpos], 0))
) {
number = positions[foll[curpos]];
newpos = foll[curpos] + 1;
for (k=0; k<number; k++) {
if (tmpstat[positions[newpos]] != 1) {
tmpstat[positions[newpos]] = 1;
count++;
}
newpos++;
}
}
pos++;
} /* end nextstate */
if (notin(n)) {
if (n >= NSTATES) overflo();
add(state, ++n);
if (tmpstat[line] == 1) out[n] = 1;
gotofn[s][c] = n;
}
else {
gotofn[s][c] = xstate;
}
}
}
}
}
cstate(v) {
register b;
if (left[v] == 0) {
if (tmpstat[v] != 1) {
tmpstat[v] = 1;
count++;
}
return(1);
}
else if (right[v] == 0) {
if (cstate(left[v]) == 0) return (0);
else if (name[v] == PLUS) return (1);
else return (0);
}
else if (name[v] == CAT) {
if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
else return (1);
}
else { /* name[v] == OR */
b = cstate(right[v]);
if (cstate(left[v]) == 0 || b == 0) return (0);
else return (1);
}
}
member(symb, set, torf) {
register i, num, pos;
num = chars[set];
pos = set + 1;
for (i=0; i<num; i++)
if (symb == chars[pos++]) return (torf);
return (!torf);
}
notin(n) {
register i, j, pos;
for (i=0; i<=n; i++) {
if (positions[state[i]] == count) {
pos = state[i] + 1;
for (j=0; j < count; j++)
if (tmpstat[positions[pos++]] != 1) goto nxt;
xstate = i;
return (0);
}
nxt: ;
}
return (1);
}
add(array, n) int *array; {
register i;
if (nxtpos + count > MAXPOS) overflo();
array[n] = nxtpos;
positions[nxtpos++] = count;
for (i=3; i <= line; i++) {
if (tmpstat[i] == 1) {
positions[nxtpos++] = i;
}
}
}
follow(v) int v; {
int p;
if (v == line) return;
p = parent[v];
switch(name[p]) {
case STAR:
case PLUS: cstate(v);
follow(p);
return;
case OR:
case QUEST: follow(p);
return;
case CAT: if (v == left[p]) {
if (cstate(right[p]) == 0) {
follow(p);
return;
}
}
else follow(p);
return;
case FINAL: if (tmpstat[line] != 1) {
tmpstat[line] = 1;
count++;
}
return;
}
}
main(argc, argv)
char **argv;
{
while (--argc > 0 && (++argv)[0][0]=='-')
switch (argv[0][1]) {
case 's':
sflag++;
continue;
case 'h':
hflag = 0;
continue;
case 'b':
bflag++;
continue;
case 'c':
cflag++;
continue;
case 'e':
argc--;
argv++;
goto out;
case 'f':
fflag++;
continue;
case 'l':
lflag++;
continue;
case 'n':
nflag++;
continue;
case 'v':
vflag++;
continue;
default:
fprintf(stderr, "egrep: unknown flag\n");
continue;
}
out:
if (argc<=0)
exit(2);
if (fflag) {
if (freopen(fname = *argv, "r", stdin) == NULL) {
fprintf(stderr, "egrep: can't open %s\n", fname);
exit(2);
}
}
else input = *argv;
argc--;
argv++;
yyparse();
cfoll(line-1);
cgotofn();
nfile = argc;
if (argc<=0) {
if (lflag) exit(1);
execute(0);
}
else while (--argc >= 0) {
execute(*argv);
argv++;
}
exit(nsucc == 0);
}
execute(file)
char *file;
{
register char *p;
register cstat;
register ccount;
char buf[1024];
char *nlp;
int istat;
if (file) {
if ((f = open(file, 0)) < 0) {
fprintf(stderr, "egrep: can't open %s\n", file);
exit(2);
}
}
else f = 0;
ccount = 0;
lnum = 1;
tln = 0;
blkno = 0;
p = buf;
nlp = p;
if ((ccount = read(f,p,512))<=0) goto done;
istat = cstat = gotofn[0]['\n'];
if (out[cstat]) goto found;
for (;;) {
cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
if (out[cstat]) {
found: for(;;) {
if (*p++ == '\n') {
if (vflag == 0) {
succeed: nsucc = 1;
if (cflag) tln++;
else if (sflag)
; /* ugh */
else if (lflag) {
printf("%s\n", file);
close(f);
return;
}
else {
if (nfile > 1 && hflag) printf("%s:", file);
if (bflag) printf("%d:", blkno);
if (nflag) printf("%ld:", lnum);
if (p <= nlp) {
while (nlp < &buf[1024]) putchar(*nlp++);
nlp = buf;
}
while (nlp < p) putchar(*nlp++);
}
}
lnum++;
nlp = p;
if ((out[(cstat=istat)]) == 0) goto brk2;
}
cfound:
if (--ccount <= 0) {
if (p <= &buf[512]) {
if ((ccount = read(f, p, 512)) <= 0) goto done;
}
else if (p == &buf[1024]) {
p = buf;
if ((ccount = read(f, p, 512)) <= 0) goto done;
}
else {
if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
}
blkno++;
}
}
}
if (*p++ == '\n') {
if (vflag) goto succeed;
else {
lnum++;
nlp = p;
if (out[(cstat=istat)]) goto cfound;
}
}
brk2:
if (--ccount <= 0) {
if (p <= &buf[512]) {
if ((ccount = read(f, p, 512)) <= 0) break;
}
else if (p == &buf[1024]) {
p = buf;
if ((ccount = read(f, p, 512)) <= 0) break;
}
else {
if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
}
blkno++;
}
}
done: close(f);
if (cflag) {
if (nfile > 1)
printf("%s:", file);
printf("%ld\n", tln);
}
}