Bell 32V development
[unix-history] / usr / src / cmd / awk / b.c
CommitLineData
9756ffef
TL
1#include "awk.def"
2#include "stdio.h"
3#include "awk.h"
4
5extern node *op2();
6extern struct fa *cgotofn();
7#define MAXLIN 256
8#define NCHARS 128
9#define NSTATES 256
10
11#define type(v) v->nobj
12#define left(v) v->narg[0]
13#define right(v) v->narg[1]
14#define parent(v) v->nnext
15
16#define LEAF case CCL: case NCCL: case CHAR: case DOT:
17#define UNARY case FINAL: case STAR: case PLUS: case QUEST:
18
19/* encoding in tree nodes:
20 leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
21 unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
22 binary (CAT, OR): left and right are children
23 parent contains pointer to parent
24*/
25
26struct fa {
27 int cch;
28 struct fa *st;
29};
30
31int *state[NSTATES];
32int *foll[MAXLIN];
33char chars[MAXLIN];
34int setvec[MAXLIN];
35node *point[MAXLIN];
36
37int setcnt;
38int line;
39
40
41struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
42node *p;
43{
44 node *p1;
45 struct fa *fap;
46 p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
47 /* put DOT STAR in front of reg. exp. */
48 p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
49
50 line = 0;
51 penter(p1); /* enter parent pointers and leaf indices */
52 point[line] = p1; /* FINAL node */
53 setvec[0] = 1; /* for initial DOT STAR */
54 cfoll(p1); /* set up follow sets */
55 fap = cgotofn();
56 freetr(p1); /* add this when alloc works */
57 return(fap);
58}
59
60penter(p) /* set up parent pointers and leaf indices */
61node *p;
62{
63 switch(type(p)) {
64 LEAF
65 left(p) = (node *) line;
66 point[line++] = p;
67 break;
68 UNARY
69 penter(left(p));
70 parent(left(p)) = p;
71 break;
72 case CAT:
73 case OR:
74 penter(left(p));
75 penter(right(p));
76 parent(left(p)) = p;
77 parent(right(p)) = p;
78 break;
79 default:
80 error(FATAL, "unknown type %d in penter\n", type(p));
81 break;
82 }
83}
84
85freetr(p) /* free parse tree and follow sets */
86node *p;
87{
88 switch(type(p)) {
89 LEAF
90 xfree(foll[(int) left(p)]);
91 xfree(p);
92 break;
93 UNARY
94 freetr(left(p));
95 xfree(p);
96 break;
97 case CAT:
98 case OR:
99 freetr(left(p));
100 freetr(right(p));
101 xfree(p);
102 break;
103 default:
104 error(FATAL, "unknown type %d in freetr", type(p));
105 break;
106 }
107}
108char *cclenter(p)
109register char *p;
110{
111 register i, c;
112 char *op;
113
114 op = p;
115 i = 0;
116 while ((c = *p++) != 0) {
117 if (c == '-' && i > 0 && chars[i-1] != 0) {
118 if (*p != 0) {
119 c = chars[i-1];
120 while (c < *p) {
121 if (i >= MAXLIN)
122 overflo();
123 chars[i++] = ++c;
124 }
125 p++;
126 continue;
127 }
128 }
129 if (i >= MAXLIN)
130 overflo();
131 chars[i++] = c;
132 }
133 chars[i++] = '\0';
134 dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
135 xfree(op);
136 return(tostring(chars));
137}
138
139overflo()
140{
141 error(FATAL, "regular expression too long\n");
142}
143
144cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
145register node *v;
146{
147 register i;
148 int prev;
149 int *add();
150
151 switch(type(v)) {
152 LEAF
153 setcnt = 0;
154 for (i=1; i<=line; i++)
155 setvec[i] = 0;
156 follow(v);
157 if (notin(foll, ( (int) left(v))-1, &prev)) {
158 foll[(int) left(v)] = add(setcnt);
159 }
160 else
161 foll[ (int) left(v)] = foll[prev];
162 break;
163 UNARY
164 cfoll(left(v));
165 break;
166 case CAT:
167 case OR:
168 cfoll(left(v));
169 cfoll(right(v));
170 break;
171 default:
172 error(FATAL, "unknown type %d in cfoll", type(v));
173 }
174}
175
176first(p) /* collects initially active leaves of p into setvec */
177register node *p; /* returns 0 or 1 depending on whether p matches empty string */
178{
179 register b;
180
181 switch(type(p)) {
182 LEAF
183 if (setvec[(int) left(p)] != 1) {
184 setvec[(int) left(p)] = 1;
185 setcnt++;
186 }
187 if (type(p) == CCL && (*(char *) right(p)) == '\0')
188 return(0); /* empty CCL */
189 else return(1);
190 case FINAL:
191 case PLUS:
192 if (first(left(p)) == 0) return(0);
193 return(1);
194 case STAR:
195 case QUEST:
196 first(left(p));
197 return(0);
198 case CAT:
199 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
200 return(1);
201 case OR:
202 b = first(right(p));
203 if (first(left(p)) == 0 || b == 0) return(0);
204 return(1);
205 }
206 error(FATAL, "unknown type %d in first\n", type(p));
207 return(-1);
208}
209
210follow(v)
211node *v; /* collects leaves that can follow v into setvec */
212{
213 node *p;
214
215 if (type(v) == FINAL)
216 return;
217 p = parent(v);
218 switch (type(p)) {
219 case STAR:
220 case PLUS: first(v);
221 follow(p);
222 return;
223
224 case OR:
225 case QUEST: follow(p);
226 return;
227
228 case CAT: if (v == left(p)) { /* v is left child of p */
229 if (first(right(p)) == 0) {
230 follow(p);
231 return;
232 }
233 }
234 else /* v is right child */
235 follow(p);
236 return;
237 case FINAL: if (setvec[line] != 1) {
238 setvec[line] = 1;
239 setcnt++;
240 }
241 return;
242 }
243}
244
245member(c, s) /* is c in s? */
246register char c, *s;
247{
248 while (*s)
249 if (c == *s++)
250 return(1);
251 return(0);
252}
253
254notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
255int **array;
256int *prev; {
257 register i, j;
258 int *ptr;
259 for (i=0; i<=n; i++) {
260 ptr = array[i];
261 if (*ptr == setcnt) {
262 for (j=0; j < setcnt; j++)
263 if (setvec[*(++ptr)] != 1) goto nxt;
264 *prev = i;
265 return(0);
266 }
267 nxt: ;
268 }
269 return(1);
270}
271
272int *add(n) { /* remember setvec */
273 int *ptr, *p;
274 register i;
275 if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
276 overflo();
277 *ptr = n;
278 dprintf("add(%d)\n", n, NULL, NULL);
279 for (i=1; i <= line; i++)
280 if (setvec[i] == 1) {
281 *(++ptr) = i;
282 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
283 }
284 dprintf("\n", NULL, NULL, NULL);
285 return(p);
286}
287
288struct fa *cgotofn()
289{
290 register i, k;
291 register int *ptr;
292 char c;
293 char *p;
294 node *cp;
295 int j, n, s, ind, numtrans;
296 int finflg;
297 int curpos, num, prev;
298 struct fa *where[NSTATES];
299
300 int fatab[257];
301 struct fa *pfa;
302
303 char index[MAXLIN];
304 char iposns[MAXLIN];
305 int sposns[MAXLIN];
306 int spmax, spinit;
307
308 char symbol[NCHARS];
309 char isyms[NCHARS];
310 char ssyms[NCHARS];
311 int ssmax, ssinit;
312
313 for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
314 for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
315 setcnt = 0;
316 state[0] = add(0);
317 /* compute initial positions and symbols of state 0 */
318 ssmax = 0;
319 ptr = foll[0];
320 spinit = *ptr;
321 for (i=0; i<spinit; i++) {
322 curpos = *(++ptr);
323 sposns[i] = curpos;
324 iposns[curpos] = 1;
325 cp = point[curpos];
326 dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
327 switch (type(cp)) {
328 case CHAR:
329 k = (int) right(cp);
330 if (isyms[k] != 1) {
331 isyms[k] = 1;
332 ssyms[ssmax++] = k;
333 }
334 break;
335 case DOT:
336 for (k=1; k<NCHARS; k++) {
337 if (k != HAT) {
338 if (isyms[k] != 1) {
339 isyms[k] = 1;
340 ssyms[ssmax++] = k;
341 }
342 }
343 }
344 break;
345 case CCL:
346 for (p = (char *) right(cp); *p; p++) {
347 if (*p != HAT) {
348 if (isyms[*p] != 1) {
349 isyms[*p] = 1;
350 ssyms[ssmax++] = *p;
351 }
352 }
353 }
354 break;
355 case NCCL:
356 for (k=1; k<NCHARS; k++) {
357 if (k != HAT && !member(k, (char *) right(cp))) {
358 if (isyms[k] != 1) {
359 isyms[k] = 1;
360 ssyms[ssmax++] = k;
361 }
362 }
363 }
364 }
365 }
366 ssinit = ssmax;
367 n = 0;
368 for (s=0; s<=n; s++) {
369 dprintf("s = %d\n", s, NULL, NULL);
370 ind = 0;
371 numtrans = 0;
372 finflg = 0;
373 if (*(state[s] + *state[s]) == line) { /* s final? */
374 finflg = 1;
375 goto tenter;
376 }
377 spmax = spinit;
378 ssmax = ssinit;
379 ptr = state[s];
380 num = *ptr;
381 for (i=0; i<num; i++) {
382 curpos = *(++ptr);
383 if (iposns[curpos] != 1 && index[curpos] != 1) {
384 index[curpos] = 1;
385 sposns[spmax++] = curpos;
386 }
387 cp = point[curpos];
388 switch (type(cp)) {
389 case CHAR:
390 k = (int) right(cp);
391 if (isyms[k] == 0 && symbol[k] == 0) {
392 symbol[k] = 1;
393 ssyms[ssmax++] = k;
394 }
395 break;
396 case DOT:
397 for (k=1; k<NCHARS; k++) {
398 if (k != HAT) {
399 if (isyms[k] == 0 && symbol[k] == 0) {
400 symbol[k] = 1;
401 ssyms[ssmax++] = k;
402 }
403 }
404 }
405 break;
406 case CCL:
407 for (p = (char *) right(cp); *p; p++) {
408 if (*p != HAT) {
409 if (isyms[*p] == 0 && symbol[*p] == 0) {
410 symbol[*p] = 1;
411 ssyms[ssmax++] = *p;
412 }
413 }
414 }
415 break;
416 case NCCL:
417 for (k=1; k<NCHARS; k++) {
418 if (k != HAT && !member(k, (char *) right(cp))) {
419 if (isyms[k] == 0 && symbol[k] == 0) {
420 symbol[k] = 1;
421 ssyms[ssmax++] = k;
422 }
423 }
424 }
425 }
426 }
427 for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
428 c = ssyms[j];
429 symbol[c] = 0;
430 setcnt = 0;
431 for (k=0; k<=line; k++) setvec[k] = 0;
432 for (i=0; i<spmax; i++) {
433 index[sposns[i]] = 0;
434 cp = point[sposns[i]];
435 if ((k = type(cp)) != FINAL)
436 if (k == CHAR && c == (int) right(cp)
437 || k == DOT
438 || k == CCL && member(c, (char *) right(cp))
439 || k == NCCL && !member(c, (char *) right(cp))) {
440 ptr = foll[sposns[i]];
441 num = *ptr;
442 for (k=0; k<num; k++) {
443 if (setvec[*(++ptr)] != 1
444 && iposns[*ptr] != 1) {
445 setvec[*ptr] = 1;
446 setcnt++;
447 }
448 }
449 }
450 } /* end nextstate */
451 if (notin(state, n, &prev)) {
452 if (n >= NSTATES) {
453 dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
454 overflo();
455 }
456 state[++n] = add(setcnt);
457 dprintf(" delta(%d,%o) = %d", s,c,n);
458 dprintf(", ind = %d\n", ind+1, NULL, NULL);
459 fatab[++ind] = c;
460 fatab[++ind] = n;
461 numtrans++;
462 }
463 else {
464 if (prev != 0) {
465 dprintf(" delta(%d,%o) = %d", s,c,prev);
466 dprintf(", ind = %d\n", ind+1, NULL, NULL);
467 fatab[++ind] = c;
468 fatab[++ind] = prev;
469 numtrans++;
470 }
471 }
472 }
473 tenter:
474 if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
475 overflo();
476 where[s] = pfa;
477 if (finflg)
478 pfa->cch = -1; /* s is a final state */
479 else
480 pfa->cch = numtrans;
481 pfa->st = 0;
482 for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
483 pfa->cch = fatab[2*i-1];
484 pfa->st = (struct fa *) fatab[2*i];
485 }
486 }
487 for (i=0; i<=n; i++) {
488 xfree(state[i]); /* free state[i] */
489 pfa = where[i];
490 pfa->st = where[0];
491 dprintf("state %d: (%o)\n", i, pfa, NULL);
492 dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
493 for (k=1; k<=pfa->cch; k++) {
494 (pfa+k)->st = where[ (int) (pfa+k)->st];
495 dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
496 }
497 }
498 pfa = where[0];
499 if ((num = pfa->cch) < 0)
500 return(where[0]);
501 for (pfa += num; num; num--, pfa--)
502 if (pfa->cch == HAT) {
503 return(pfa->st);
504 }
505 return(where[0]);
506}
507
508match(pfa, p)
509register struct fa *pfa;
510register char *p;
511{
512 register count;
513 char c;
514 if (p == 0) return(0);
515 if (pfa->cch == 1) { /* fast test for first character, if possible */
516 c = (++pfa)->cch;
517 do
518 if (c == *p) {
519 p++;
520 pfa = pfa->st;
521 goto adv;
522 }
523 while (*p++ != 0);
524 return(0);
525 }
526 adv: if ((count = pfa->cch) < 0) return(1);
527 do {
528 for (pfa += count; count; count--, pfa--)
529 if (pfa->cch == *p) {
530 break;
531 }
532 pfa = pfa->st;
533 if ((count = pfa->cch) < 0) return(1);
534 } while(*p++ != 0);
535 return(0);
536}