mkdir fixes
[unix-history] / usr / src / old / awk / b.c
CommitLineData
8351eafd
KM
1/* b.c 4.1 82/05/07 */
2
3#include "awk.def"
4#include "stdio.h"
5#include "awk.h"
6
7extern node *op2();
8extern struct fa *cgotofn();
9#define MAXLIN 256
10#define NCHARS 128
11#define NSTATES 256
12
13#define type(v) v->nobj
14#define left(v) v->narg[0]
15#define right(v) v->narg[1]
16#define parent(v) v->nnext
17
18#define LEAF case CCL: case NCCL: case CHAR: case DOT:
19#define UNARY case FINAL: case STAR: case PLUS: case QUEST:
20
21/* encoding in tree nodes:
22 leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
23 unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
24 binary (CAT, OR): left and right are children
25 parent contains pointer to parent
26*/
27
28struct fa {
29 int cch;
30 struct fa *st;
31};
32
33int *state[NSTATES];
34int *foll[MAXLIN];
35char chars[MAXLIN];
36int setvec[MAXLIN];
37node *point[MAXLIN];
38
39int setcnt;
40int line;
41
42
43struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
44node *p;
45{
46 node *p1;
47 struct fa *fap;
48 p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
49 /* put DOT STAR in front of reg. exp. */
50 p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
51
52 line = 0;
53 penter(p1); /* enter parent pointers and leaf indices */
54 point[line] = p1; /* FINAL node */
55 setvec[0] = 1; /* for initial DOT STAR */
56 cfoll(p1); /* set up follow sets */
57 fap = cgotofn();
58 freetr(p1); /* add this when alloc works */
59 return(fap);
60}
61
62penter(p) /* set up parent pointers and leaf indices */
63node *p;
64{
65 switch(type(p)) {
66 LEAF
67 left(p) = (node *) line;
68 point[line++] = p;
69 break;
70 UNARY
71 penter(left(p));
72 parent(left(p)) = p;
73 break;
74 case CAT:
75 case OR:
76 penter(left(p));
77 penter(right(p));
78 parent(left(p)) = p;
79 parent(right(p)) = p;
80 break;
81 default:
82 error(FATAL, "unknown type %d in penter\n", type(p));
83 break;
84 }
85}
86
87freetr(p) /* free parse tree and follow sets */
88node *p;
89{
90 switch(type(p)) {
91 LEAF
92 xfree(foll[(int) left(p)]);
93 xfree(p);
94 break;
95 UNARY
96 freetr(left(p));
97 xfree(p);
98 break;
99 case CAT:
100 case OR:
101 freetr(left(p));
102 freetr(right(p));
103 xfree(p);
104 break;
105 default:
106 error(FATAL, "unknown type %d in freetr", type(p));
107 break;
108 }
109}
110char *cclenter(p)
111register char *p;
112{
113 register i, c;
114 char *op;
115
116 op = p;
117 i = 0;
118 while ((c = *p++) != 0) {
119 if (c == '-' && i > 0 && chars[i-1] != 0) {
120 if (*p != 0) {
121 c = chars[i-1];
122 while (c < *p) {
123 if (i >= MAXLIN)
124 overflo();
125 chars[i++] = ++c;
126 }
127 p++;
128 continue;
129 }
130 }
131 if (i >= MAXLIN)
132 overflo();
133 chars[i++] = c;
134 }
135 chars[i++] = '\0';
136 dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
137 xfree(op);
138 return(tostring(chars));
139}
140
141overflo()
142{
143 error(FATAL, "regular expression too long\n");
144}
145
146cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
147register node *v;
148{
149 register i;
150 int prev;
151 int *add();
152
153 switch(type(v)) {
154 LEAF
155 setcnt = 0;
156 for (i=1; i<=line; i++)
157 setvec[i] = 0;
158 follow(v);
159 if (notin(foll, ( (int) left(v))-1, &prev)) {
160 foll[(int) left(v)] = add(setcnt);
161 }
162 else
163 foll[ (int) left(v)] = foll[prev];
164 break;
165 UNARY
166 cfoll(left(v));
167 break;
168 case CAT:
169 case OR:
170 cfoll(left(v));
171 cfoll(right(v));
172 break;
173 default:
174 error(FATAL, "unknown type %d in cfoll", type(v));
175 }
176}
177
178first(p) /* collects initially active leaves of p into setvec */
179register node *p; /* returns 0 or 1 depending on whether p matches empty string */
180{
181 register b;
182
183 switch(type(p)) {
184 LEAF
185 if (setvec[(int) left(p)] != 1) {
186 setvec[(int) left(p)] = 1;
187 setcnt++;
188 }
189 if (type(p) == CCL && (*(char *) right(p)) == '\0')
190 return(0); /* empty CCL */
191 else return(1);
192 case FINAL:
193 case PLUS:
194 if (first(left(p)) == 0) return(0);
195 return(1);
196 case STAR:
197 case QUEST:
198 first(left(p));
199 return(0);
200 case CAT:
201 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
202 return(1);
203 case OR:
204 b = first(right(p));
205 if (first(left(p)) == 0 || b == 0) return(0);
206 return(1);
207 }
208 error(FATAL, "unknown type %d in first\n", type(p));
209 return(-1);
210}
211
212follow(v)
213node *v; /* collects leaves that can follow v into setvec */
214{
215 node *p;
216
217 if (type(v) == FINAL)
218 return;
219 p = parent(v);
220 switch (type(p)) {
221 case STAR:
222 case PLUS: first(v);
223 follow(p);
224 return;
225
226 case OR:
227 case QUEST: follow(p);
228 return;
229
230 case CAT: if (v == left(p)) { /* v is left child of p */
231 if (first(right(p)) == 0) {
232 follow(p);
233 return;
234 }
235 }
236 else /* v is right child */
237 follow(p);
238 return;
239 case FINAL: if (setvec[line] != 1) {
240 setvec[line] = 1;
241 setcnt++;
242 }
243 return;
244 }
245}
246
247member(c, s) /* is c in s? */
248register char c, *s;
249{
250 while (*s)
251 if (c == *s++)
252 return(1);
253 return(0);
254}
255
256notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
257int **array;
258int *prev; {
259 register i, j;
260 int *ptr;
261 for (i=0; i<=n; i++) {
262 ptr = array[i];
263 if (*ptr == setcnt) {
264 for (j=0; j < setcnt; j++)
265 if (setvec[*(++ptr)] != 1) goto nxt;
266 *prev = i;
267 return(0);
268 }
269 nxt: ;
270 }
271 return(1);
272}
273
274int *add(n) { /* remember setvec */
275 int *ptr, *p;
276 register i;
277 if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
278 overflo();
279 *ptr = n;
280 dprintf("add(%d)\n", n, NULL, NULL);
281 for (i=1; i <= line; i++)
282 if (setvec[i] == 1) {
283 *(++ptr) = i;
284 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
285 }
286 dprintf("\n", NULL, NULL, NULL);
287 return(p);
288}
289
290struct fa *cgotofn()
291{
292 register i, k;
293 register int *ptr;
294 char c;
295 char *p;
296 node *cp;
297 int j, n, s, ind, numtrans;
298 int finflg;
299 int curpos, num, prev;
300 struct fa *where[NSTATES];
301
302 int fatab[257];
303 struct fa *pfa;
304
305 char index[MAXLIN];
306 char iposns[MAXLIN];
307 int sposns[MAXLIN];
308 int spmax, spinit;
309
310 char symbol[NCHARS];
311 char isyms[NCHARS];
312 char ssyms[NCHARS];
313 int ssmax, ssinit;
314
315 for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
316 for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
317 setcnt = 0;
318 /* compute initial positions and symbols of state 0 */
319 ssmax = 0;
320 ptr = state[0] = foll[0];
321 spinit = *ptr;
322 for (i=0; i<spinit; i++) {
323 curpos = *(++ptr);
324 sposns[i] = curpos;
325 iposns[curpos] = 1;
326 cp = point[curpos];
327 dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
328 switch (type(cp)) {
329 case CHAR:
330 k = (int) right(cp);
331 if (isyms[k] != 1) {
332 isyms[k] = 1;
333 ssyms[ssmax++] = k;
334 }
335 break;
336 case DOT:
337 for (k=1; k<NCHARS; k++) {
338 if (k != HAT) {
339 if (isyms[k] != 1) {
340 isyms[k] = 1;
341 ssyms[ssmax++] = k;
342 }
343 }
344 }
345 break;
346 case CCL:
347 for (p = (char *) right(cp); *p; p++) {
348 if (*p != HAT) {
349 if (isyms[*p] != 1) {
350 isyms[*p] = 1;
351 ssyms[ssmax++] = *p;
352 }
353 }
354 }
355 break;
356 case NCCL:
357 for (k=1; k<NCHARS; k++) {
358 if (k != HAT && !member(k, (char *) right(cp))) {
359 if (isyms[k] != 1) {
360 isyms[k] = 1;
361 ssyms[ssmax++] = k;
362 }
363 }
364 }
365 }
366 }
367 ssinit = ssmax;
368 n = 0;
369 for (s=0; s<=n; s++) {
370 dprintf("s = %d\n", s, NULL, NULL);
371 ind = 0;
372 numtrans = 0;
373 finflg = 0;
374 if (*(state[s] + *state[s]) == line) { /* s final? */
375 finflg = 1;
376 goto tenter;
377 }
378 spmax = spinit;
379 ssmax = ssinit;
380 ptr = state[s];
381 num = *ptr;
382 for (i=0; i<num; i++) {
383 curpos = *(++ptr);
384 if (iposns[curpos] != 1 && index[curpos] != 1) {
385 index[curpos] = 1;
386 sposns[spmax++] = curpos;
387 }
388 cp = point[curpos];
389 switch (type(cp)) {
390 case CHAR:
391 k = (int) right(cp);
392 if (isyms[k] == 0 && symbol[k] == 0) {
393 symbol[k] = 1;
394 ssyms[ssmax++] = k;
395 }
396 break;
397 case DOT:
398 for (k=1; k<NCHARS; k++) {
399 if (k != HAT) {
400 if (isyms[k] == 0 && symbol[k] == 0) {
401 symbol[k] = 1;
402 ssyms[ssmax++] = k;
403 }
404 }
405 }
406 break;
407 case CCL:
408 for (p = (char *) right(cp); *p; p++) {
409 if (*p != HAT) {
410 if (isyms[*p] == 0 && symbol[*p] == 0) {
411 symbol[*p] = 1;
412 ssyms[ssmax++] = *p;
413 }
414 }
415 }
416 break;
417 case NCCL:
418 for (k=1; k<NCHARS; k++) {
419 if (k != HAT && !member(k, (char *) right(cp))) {
420 if (isyms[k] == 0 && symbol[k] == 0) {
421 symbol[k] = 1;
422 ssyms[ssmax++] = k;
423 }
424 }
425 }
426 }
427 }
428 for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
429 c = ssyms[j];
430 symbol[c] = 0;
431 setcnt = 0;
432 for (k=0; k<=line; k++) setvec[k] = 0;
433 for (i=0; i<spmax; i++) {
434 index[sposns[i]] = 0;
435 cp = point[sposns[i]];
436 if ((k = type(cp)) != FINAL)
437 if (k == CHAR && c == (int) right(cp)
438 || k == DOT
439 || k == CCL && member(c, (char *) right(cp))
440 || k == NCCL && !member(c, (char *) right(cp))) {
441 ptr = foll[sposns[i]];
442 num = *ptr;
443 for (k=0; k<num; k++) {
444 if (setvec[*(++ptr)] != 1
445 && iposns[*ptr] != 1) {
446 setvec[*ptr] = 1;
447 setcnt++;
448 }
449 }
450 }
451 } /* end nextstate */
452 if (notin(state, n, &prev)) {
453 if (n >= NSTATES) {
454 dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
455 overflo();
456 }
457 state[++n] = add(setcnt);
458 dprintf(" delta(%d,%o) = %d", s,c,n);
459 dprintf(", ind = %d\n", ind+1, NULL, NULL);
460 fatab[++ind] = c;
461 fatab[++ind] = n;
462 numtrans++;
463 }
464 else {
465 if (prev != 0) {
466 dprintf(" delta(%d,%o) = %d", s,c,prev);
467 dprintf(", ind = %d\n", ind+1, NULL, NULL);
468 fatab[++ind] = c;
469 fatab[++ind] = prev;
470 numtrans++;
471 }
472 }
473 }
474 tenter:
475 if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
476 overflo();
477 where[s] = pfa;
478 if (finflg)
479 pfa->cch = -1; /* s is a final state */
480 else
481 pfa->cch = numtrans;
482 pfa->st = 0;
483 for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
484 pfa->cch = fatab[2*i-1];
485 pfa->st = (struct fa *) fatab[2*i];
486 }
487 }
488 for (i=0; i<=n; i++) {
489 xfree(state[i]); /* free state[i] */
490 pfa = where[i];
491 pfa->st = where[0];
492 dprintf("state %d: (%o)\n", i, pfa, NULL);
493 dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
494 for (k=1; k<=pfa->cch; k++) {
495 (pfa+k)->st = where[ (int) (pfa+k)->st];
496 dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
497 }
498 }
499 pfa = where[0];
500 if ((num = pfa->cch) < 0)
501 return(where[0]);
502 for (pfa += num; num; num--, pfa--)
503 if (pfa->cch == HAT) {
504 return(pfa->st);
505 }
506 return(where[0]);
507}
508
509match(pfa, p)
510register struct fa *pfa;
511register char *p;
512{
513 register count;
514 char c;
515 if (p == 0) return(0);
516 if (pfa->cch == 1) { /* fast test for first character, if possible */
517 c = (++pfa)->cch;
518 do
519 if (c == *p) {
520 p++;
521 pfa = pfa->st;
522 goto adv;
523 }
524 while (*p++ != 0);
525 return(0);
526 }
527 adv: if ((count = pfa->cch) < 0) return(1);
528 do {
529 for (pfa += count; count; count--, pfa--)
530 if (pfa->cch == *p) {
531 break;
532 }
533 pfa = pfa->st;
534 if ((count = pfa->cch) < 0) return(1);
535 } while(*p++ != 0);
536 return(0);
537}