add man page, cleanup
[unix-history] / usr / src / old / awk / b.c
CommitLineData
b803c326 1#ifndef lint
39a6c341 2static char sccsid[] = "@(#)b.c 4.3 %G%";
b803c326 3#endif
8351eafd
KM
4
5#include "awk.def"
6#include "stdio.h"
7#include "awk.h"
8
9extern node *op2();
10extern struct fa *cgotofn();
11#define MAXLIN 256
12#define NCHARS 128
13#define NSTATES 256
14
15#define type(v) v->nobj
16#define left(v) v->narg[0]
17#define right(v) v->narg[1]
18#define parent(v) v->nnext
19
20#define LEAF case CCL: case NCCL: case CHAR: case DOT:
21#define UNARY case FINAL: case STAR: case PLUS: case QUEST:
22
23/* encoding in tree nodes:
24 leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
25 unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
26 binary (CAT, OR): left and right are children
27 parent contains pointer to parent
28*/
29
30struct fa {
31 int cch;
32 struct fa *st;
33};
34
35int *state[NSTATES];
36int *foll[MAXLIN];
37char chars[MAXLIN];
38int setvec[MAXLIN];
39node *point[MAXLIN];
40
41int setcnt;
42int line;
39a6c341 43int maxfoll; /* index of highest foll[] entry set by cfoll() */
8351eafd
KM
44
45
46struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
47node *p;
48{
49 node *p1;
50 struct fa *fap;
51 p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
52 /* put DOT STAR in front of reg. exp. */
53 p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
54
55 line = 0;
56 penter(p1); /* enter parent pointers and leaf indices */
57 point[line] = p1; /* FINAL node */
58 setvec[0] = 1; /* for initial DOT STAR */
59 cfoll(p1); /* set up follow sets */
60 fap = cgotofn();
61 freetr(p1); /* add this when alloc works */
62 return(fap);
63}
64
65penter(p) /* set up parent pointers and leaf indices */
66node *p;
67{
68 switch(type(p)) {
69 LEAF
70 left(p) = (node *) line;
71 point[line++] = p;
72 break;
73 UNARY
74 penter(left(p));
75 parent(left(p)) = p;
76 break;
77 case CAT:
78 case OR:
79 penter(left(p));
80 penter(right(p));
81 parent(left(p)) = p;
82 parent(right(p)) = p;
83 break;
84 default:
85 error(FATAL, "unknown type %d in penter\n", type(p));
86 break;
87 }
88}
89
90freetr(p) /* free parse tree and follow sets */
91node *p;
92{
93 switch(type(p)) {
94 LEAF
39a6c341 95 foll_free((int) left(p));
8351eafd
KM
96 xfree(p);
97 break;
98 UNARY
99 freetr(left(p));
100 xfree(p);
101 break;
102 case CAT:
103 case OR:
104 freetr(left(p));
105 freetr(right(p));
106 xfree(p);
107 break;
108 default:
109 error(FATAL, "unknown type %d in freetr", type(p));
110 break;
111 }
112}
113char *cclenter(p)
114register char *p;
115{
116 register i, c;
117 char *op;
118
119 op = p;
120 i = 0;
121 while ((c = *p++) != 0) {
122 if (c == '-' && i > 0 && chars[i-1] != 0) {
123 if (*p != 0) {
124 c = chars[i-1];
125 while (c < *p) {
126 if (i >= MAXLIN)
127 overflo();
128 chars[i++] = ++c;
129 }
130 p++;
131 continue;
132 }
133 }
134 if (i >= MAXLIN)
135 overflo();
136 chars[i++] = c;
137 }
138 chars[i++] = '\0';
139 dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
140 xfree(op);
141 return(tostring(chars));
142}
143
144overflo()
145{
146 error(FATAL, "regular expression too long\n");
147}
148
149cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
150register node *v;
151{
152 register i;
153 int prev;
154 int *add();
155
39a6c341 156 maxfoll = -1;
8351eafd
KM
157 switch(type(v)) {
158 LEAF
159 setcnt = 0;
160 for (i=1; i<=line; i++)
161 setvec[i] = 0;
162 follow(v);
163 if (notin(foll, ( (int) left(v))-1, &prev)) {
164 foll[(int) left(v)] = add(setcnt);
165 }
166 else
167 foll[ (int) left(v)] = foll[prev];
39a6c341
KM
168 if ((int)left(v) > maxfoll)
169 maxfoll = (int)left(v);
8351eafd
KM
170 break;
171 UNARY
172 cfoll(left(v));
173 break;
174 case CAT:
175 case OR:
176 cfoll(left(v));
177 cfoll(right(v));
178 break;
179 default:
180 error(FATAL, "unknown type %d in cfoll", type(v));
181 }
182}
183
184first(p) /* collects initially active leaves of p into setvec */
185register node *p; /* returns 0 or 1 depending on whether p matches empty string */
186{
187 register b;
188
189 switch(type(p)) {
190 LEAF
191 if (setvec[(int) left(p)] != 1) {
192 setvec[(int) left(p)] = 1;
193 setcnt++;
194 }
195 if (type(p) == CCL && (*(char *) right(p)) == '\0')
196 return(0); /* empty CCL */
197 else return(1);
198 case FINAL:
199 case PLUS:
200 if (first(left(p)) == 0) return(0);
201 return(1);
202 case STAR:
203 case QUEST:
204 first(left(p));
205 return(0);
206 case CAT:
207 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
208 return(1);
209 case OR:
210 b = first(right(p));
211 if (first(left(p)) == 0 || b == 0) return(0);
212 return(1);
213 }
214 error(FATAL, "unknown type %d in first\n", type(p));
215 return(-1);
216}
217
218follow(v)
219node *v; /* collects leaves that can follow v into setvec */
220{
221 node *p;
222
223 if (type(v) == FINAL)
224 return;
225 p = parent(v);
226 switch (type(p)) {
227 case STAR:
228 case PLUS: first(v);
229 follow(p);
230 return;
231
232 case OR:
233 case QUEST: follow(p);
234 return;
235
236 case CAT: if (v == left(p)) { /* v is left child of p */
237 if (first(right(p)) == 0) {
238 follow(p);
239 return;
240 }
241 }
242 else /* v is right child */
243 follow(p);
244 return;
245 case FINAL: if (setvec[line] != 1) {
246 setvec[line] = 1;
247 setcnt++;
248 }
249 return;
250 }
251}
252
253member(c, s) /* is c in s? */
254register char c, *s;
255{
256 while (*s)
257 if (c == *s++)
258 return(1);
259 return(0);
260}
261
262notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
263int **array;
264int *prev; {
265 register i, j;
266 int *ptr;
267 for (i=0; i<=n; i++) {
268 ptr = array[i];
269 if (*ptr == setcnt) {
270 for (j=0; j < setcnt; j++)
271 if (setvec[*(++ptr)] != 1) goto nxt;
272 *prev = i;
273 return(0);
274 }
275 nxt: ;
276 }
277 return(1);
278}
279
280int *add(n) { /* remember setvec */
281 int *ptr, *p;
282 register i;
283 if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
284 overflo();
285 *ptr = n;
286 dprintf("add(%d)\n", n, NULL, NULL);
287 for (i=1; i <= line; i++)
288 if (setvec[i] == 1) {
289 *(++ptr) = i;
290 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
291 }
292 dprintf("\n", NULL, NULL, NULL);
293 return(p);
294}
295
296struct fa *cgotofn()
297{
298 register i, k;
299 register int *ptr;
300 char c;
301 char *p;
302 node *cp;
303 int j, n, s, ind, numtrans;
304 int finflg;
305 int curpos, num, prev;
306 struct fa *where[NSTATES];
307
308 int fatab[257];
309 struct fa *pfa;
310
311 char index[MAXLIN];
312 char iposns[MAXLIN];
313 int sposns[MAXLIN];
314 int spmax, spinit;
315
316 char symbol[NCHARS];
317 char isyms[NCHARS];
318 char ssyms[NCHARS];
319 int ssmax, ssinit;
320
321 for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
322 for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
323 setcnt = 0;
324 /* compute initial positions and symbols of state 0 */
325 ssmax = 0;
326 ptr = state[0] = foll[0];
327 spinit = *ptr;
328 for (i=0; i<spinit; i++) {
329 curpos = *(++ptr);
330 sposns[i] = curpos;
331 iposns[curpos] = 1;
332 cp = point[curpos];
333 dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
334 switch (type(cp)) {
335 case CHAR:
336 k = (int) right(cp);
337 if (isyms[k] != 1) {
338 isyms[k] = 1;
339 ssyms[ssmax++] = k;
340 }
341 break;
342 case DOT:
343 for (k=1; k<NCHARS; k++) {
344 if (k != HAT) {
345 if (isyms[k] != 1) {
346 isyms[k] = 1;
347 ssyms[ssmax++] = k;
348 }
349 }
350 }
351 break;
352 case CCL:
353 for (p = (char *) right(cp); *p; p++) {
354 if (*p != HAT) {
355 if (isyms[*p] != 1) {
356 isyms[*p] = 1;
357 ssyms[ssmax++] = *p;
358 }
359 }
360 }
361 break;
362 case NCCL:
363 for (k=1; k<NCHARS; k++) {
364 if (k != HAT && !member(k, (char *) right(cp))) {
365 if (isyms[k] != 1) {
366 isyms[k] = 1;
367 ssyms[ssmax++] = k;
368 }
369 }
370 }
371 }
372 }
373 ssinit = ssmax;
374 n = 0;
375 for (s=0; s<=n; s++) {
376 dprintf("s = %d\n", s, NULL, NULL);
377 ind = 0;
378 numtrans = 0;
379 finflg = 0;
380 if (*(state[s] + *state[s]) == line) { /* s final? */
381 finflg = 1;
382 goto tenter;
383 }
384 spmax = spinit;
385 ssmax = ssinit;
386 ptr = state[s];
387 num = *ptr;
388 for (i=0; i<num; i++) {
389 curpos = *(++ptr);
390 if (iposns[curpos] != 1 && index[curpos] != 1) {
391 index[curpos] = 1;
392 sposns[spmax++] = curpos;
393 }
394 cp = point[curpos];
395 switch (type(cp)) {
396 case CHAR:
397 k = (int) right(cp);
398 if (isyms[k] == 0 && symbol[k] == 0) {
399 symbol[k] = 1;
400 ssyms[ssmax++] = k;
401 }
402 break;
403 case DOT:
404 for (k=1; k<NCHARS; k++) {
405 if (k != HAT) {
406 if (isyms[k] == 0 && symbol[k] == 0) {
407 symbol[k] = 1;
408 ssyms[ssmax++] = k;
409 }
410 }
411 }
412 break;
413 case CCL:
414 for (p = (char *) right(cp); *p; p++) {
415 if (*p != HAT) {
416 if (isyms[*p] == 0 && symbol[*p] == 0) {
417 symbol[*p] = 1;
418 ssyms[ssmax++] = *p;
419 }
420 }
421 }
422 break;
423 case NCCL:
424 for (k=1; k<NCHARS; k++) {
425 if (k != HAT && !member(k, (char *) right(cp))) {
426 if (isyms[k] == 0 && symbol[k] == 0) {
427 symbol[k] = 1;
428 ssyms[ssmax++] = k;
429 }
430 }
431 }
432 }
433 }
434 for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
435 c = ssyms[j];
436 symbol[c] = 0;
437 setcnt = 0;
438 for (k=0; k<=line; k++) setvec[k] = 0;
439 for (i=0; i<spmax; i++) {
440 index[sposns[i]] = 0;
441 cp = point[sposns[i]];
442 if ((k = type(cp)) != FINAL)
443 if (k == CHAR && c == (int) right(cp)
444 || k == DOT
445 || k == CCL && member(c, (char *) right(cp))
446 || k == NCCL && !member(c, (char *) right(cp))) {
447 ptr = foll[sposns[i]];
448 num = *ptr;
449 for (k=0; k<num; k++) {
450 if (setvec[*(++ptr)] != 1
451 && iposns[*ptr] != 1) {
452 setvec[*ptr] = 1;
453 setcnt++;
454 }
455 }
456 }
457 } /* end nextstate */
458 if (notin(state, n, &prev)) {
459 if (n >= NSTATES) {
460 dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
461 overflo();
462 }
463 state[++n] = add(setcnt);
464 dprintf(" delta(%d,%o) = %d", s,c,n);
465 dprintf(", ind = %d\n", ind+1, NULL, NULL);
466 fatab[++ind] = c;
467 fatab[++ind] = n;
468 numtrans++;
469 }
470 else {
471 if (prev != 0) {
472 dprintf(" delta(%d,%o) = %d", s,c,prev);
473 dprintf(", ind = %d\n", ind+1, NULL, NULL);
474 fatab[++ind] = c;
475 fatab[++ind] = prev;
476 numtrans++;
477 }
478 }
479 }
480 tenter:
481 if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
482 overflo();
483 where[s] = pfa;
484 if (finflg)
485 pfa->cch = -1; /* s is a final state */
486 else
487 pfa->cch = numtrans;
488 pfa->st = 0;
489 for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
490 pfa->cch = fatab[2*i-1];
491 pfa->st = (struct fa *) fatab[2*i];
492 }
493 }
494 for (i=0; i<=n; i++) {
39a6c341
KM
495 /* N.b. state[0] == foll[0], not separately allocated */
496 if (i>0)
497 xfree(state[i]); /* free state[i] */
8351eafd
KM
498 pfa = where[i];
499 pfa->st = where[0];
500 dprintf("state %d: (%o)\n", i, pfa, NULL);
501 dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
502 for (k=1; k<=pfa->cch; k++) {
503 (pfa+k)->st = where[ (int) (pfa+k)->st];
504 dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
505 }
506 }
507 pfa = where[0];
508 if ((num = pfa->cch) < 0)
509 return(where[0]);
510 for (pfa += num; num; num--, pfa--)
511 if (pfa->cch == HAT) {
512 return(pfa->st);
513 }
514 return(where[0]);
515}
516
517match(pfa, p)
518register struct fa *pfa;
519register char *p;
520{
521 register count;
522 char c;
523 if (p == 0) return(0);
524 if (pfa->cch == 1) { /* fast test for first character, if possible */
525 c = (++pfa)->cch;
526 do
527 if (c == *p) {
528 p++;
529 pfa = pfa->st;
530 goto adv;
531 }
532 while (*p++ != 0);
533 return(0);
534 }
535 adv: if ((count = pfa->cch) < 0) return(1);
536 do {
537 for (pfa += count; count; count--, pfa--)
538 if (pfa->cch == *p) {
539 break;
540 }
541 pfa = pfa->st;
542 if ((count = pfa->cch) < 0) return(1);
543 } while(*p++ != 0);
544 return(0);
545}
39a6c341
KM
546
547/*
548 * Free foll[i], taking into account identical foll[] entries.
549 * This is necessary because cfoll() uses the same physical follow set for
550 * several foll[] entries when the set is identical. Called by freetr().
551 */
552foll_free(i)
553int i;
554{
555 register int j;
556 int *p = foll[i];
557 if (p==NULL) return;
558 for (j=0; j<=maxfoll; j++)
559 if (foll[j]==p) foll[j]=NULL;
560 xfree(p);
561}