made minimum granularity of vertical movement a parameter read from DESC file
[unix-history] / usr / src / old / awk / b.c
CommitLineData
b803c326
SL
1#ifndef lint
2static char sccsid[] = "@(#)b.c 4.2 %G%";
3#endif
8351eafd
KM
4
5#include "awk.def"
6#include "stdio.h"
7#include "awk.h"
8
9extern node *op2();
10extern struct fa *cgotofn();
11#define MAXLIN 256
12#define NCHARS 128
13#define NSTATES 256
14
15#define type(v) v->nobj
16#define left(v) v->narg[0]
17#define right(v) v->narg[1]
18#define parent(v) v->nnext
19
20#define LEAF case CCL: case NCCL: case CHAR: case DOT:
21#define UNARY case FINAL: case STAR: case PLUS: case QUEST:
22
23/* encoding in tree nodes:
24 leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
25 unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
26 binary (CAT, OR): left and right are children
27 parent contains pointer to parent
28*/
29
30struct fa {
31 int cch;
32 struct fa *st;
33};
34
35int *state[NSTATES];
36int *foll[MAXLIN];
37char chars[MAXLIN];
38int setvec[MAXLIN];
39node *point[MAXLIN];
40
41int setcnt;
42int line;
43
44
45struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
46node *p;
47{
48 node *p1;
49 struct fa *fap;
50 p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
51 /* put DOT STAR in front of reg. exp. */
52 p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
53
54 line = 0;
55 penter(p1); /* enter parent pointers and leaf indices */
56 point[line] = p1; /* FINAL node */
57 setvec[0] = 1; /* for initial DOT STAR */
58 cfoll(p1); /* set up follow sets */
59 fap = cgotofn();
60 freetr(p1); /* add this when alloc works */
61 return(fap);
62}
63
64penter(p) /* set up parent pointers and leaf indices */
65node *p;
66{
67 switch(type(p)) {
68 LEAF
69 left(p) = (node *) line;
70 point[line++] = p;
71 break;
72 UNARY
73 penter(left(p));
74 parent(left(p)) = p;
75 break;
76 case CAT:
77 case OR:
78 penter(left(p));
79 penter(right(p));
80 parent(left(p)) = p;
81 parent(right(p)) = p;
82 break;
83 default:
84 error(FATAL, "unknown type %d in penter\n", type(p));
85 break;
86 }
87}
88
89freetr(p) /* free parse tree and follow sets */
90node *p;
91{
92 switch(type(p)) {
93 LEAF
94 xfree(foll[(int) left(p)]);
95 xfree(p);
96 break;
97 UNARY
98 freetr(left(p));
99 xfree(p);
100 break;
101 case CAT:
102 case OR:
103 freetr(left(p));
104 freetr(right(p));
105 xfree(p);
106 break;
107 default:
108 error(FATAL, "unknown type %d in freetr", type(p));
109 break;
110 }
111}
112char *cclenter(p)
113register char *p;
114{
115 register i, c;
116 char *op;
117
118 op = p;
119 i = 0;
120 while ((c = *p++) != 0) {
121 if (c == '-' && i > 0 && chars[i-1] != 0) {
122 if (*p != 0) {
123 c = chars[i-1];
124 while (c < *p) {
125 if (i >= MAXLIN)
126 overflo();
127 chars[i++] = ++c;
128 }
129 p++;
130 continue;
131 }
132 }
133 if (i >= MAXLIN)
134 overflo();
135 chars[i++] = c;
136 }
137 chars[i++] = '\0';
138 dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
139 xfree(op);
140 return(tostring(chars));
141}
142
143overflo()
144{
145 error(FATAL, "regular expression too long\n");
146}
147
148cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
149register node *v;
150{
151 register i;
152 int prev;
153 int *add();
154
155 switch(type(v)) {
156 LEAF
157 setcnt = 0;
158 for (i=1; i<=line; i++)
159 setvec[i] = 0;
160 follow(v);
161 if (notin(foll, ( (int) left(v))-1, &prev)) {
162 foll[(int) left(v)] = add(setcnt);
163 }
164 else
165 foll[ (int) left(v)] = foll[prev];
166 break;
167 UNARY
168 cfoll(left(v));
169 break;
170 case CAT:
171 case OR:
172 cfoll(left(v));
173 cfoll(right(v));
174 break;
175 default:
176 error(FATAL, "unknown type %d in cfoll", type(v));
177 }
178}
179
180first(p) /* collects initially active leaves of p into setvec */
181register node *p; /* returns 0 or 1 depending on whether p matches empty string */
182{
183 register b;
184
185 switch(type(p)) {
186 LEAF
187 if (setvec[(int) left(p)] != 1) {
188 setvec[(int) left(p)] = 1;
189 setcnt++;
190 }
191 if (type(p) == CCL && (*(char *) right(p)) == '\0')
192 return(0); /* empty CCL */
193 else return(1);
194 case FINAL:
195 case PLUS:
196 if (first(left(p)) == 0) return(0);
197 return(1);
198 case STAR:
199 case QUEST:
200 first(left(p));
201 return(0);
202 case CAT:
203 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
204 return(1);
205 case OR:
206 b = first(right(p));
207 if (first(left(p)) == 0 || b == 0) return(0);
208 return(1);
209 }
210 error(FATAL, "unknown type %d in first\n", type(p));
211 return(-1);
212}
213
214follow(v)
215node *v; /* collects leaves that can follow v into setvec */
216{
217 node *p;
218
219 if (type(v) == FINAL)
220 return;
221 p = parent(v);
222 switch (type(p)) {
223 case STAR:
224 case PLUS: first(v);
225 follow(p);
226 return;
227
228 case OR:
229 case QUEST: follow(p);
230 return;
231
232 case CAT: if (v == left(p)) { /* v is left child of p */
233 if (first(right(p)) == 0) {
234 follow(p);
235 return;
236 }
237 }
238 else /* v is right child */
239 follow(p);
240 return;
241 case FINAL: if (setvec[line] != 1) {
242 setvec[line] = 1;
243 setcnt++;
244 }
245 return;
246 }
247}
248
249member(c, s) /* is c in s? */
250register char c, *s;
251{
252 while (*s)
253 if (c == *s++)
254 return(1);
255 return(0);
256}
257
258notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
259int **array;
260int *prev; {
261 register i, j;
262 int *ptr;
263 for (i=0; i<=n; i++) {
264 ptr = array[i];
265 if (*ptr == setcnt) {
266 for (j=0; j < setcnt; j++)
267 if (setvec[*(++ptr)] != 1) goto nxt;
268 *prev = i;
269 return(0);
270 }
271 nxt: ;
272 }
273 return(1);
274}
275
276int *add(n) { /* remember setvec */
277 int *ptr, *p;
278 register i;
279 if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
280 overflo();
281 *ptr = n;
282 dprintf("add(%d)\n", n, NULL, NULL);
283 for (i=1; i <= line; i++)
284 if (setvec[i] == 1) {
285 *(++ptr) = i;
286 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
287 }
288 dprintf("\n", NULL, NULL, NULL);
289 return(p);
290}
291
292struct fa *cgotofn()
293{
294 register i, k;
295 register int *ptr;
296 char c;
297 char *p;
298 node *cp;
299 int j, n, s, ind, numtrans;
300 int finflg;
301 int curpos, num, prev;
302 struct fa *where[NSTATES];
303
304 int fatab[257];
305 struct fa *pfa;
306
307 char index[MAXLIN];
308 char iposns[MAXLIN];
309 int sposns[MAXLIN];
310 int spmax, spinit;
311
312 char symbol[NCHARS];
313 char isyms[NCHARS];
314 char ssyms[NCHARS];
315 int ssmax, ssinit;
316
317 for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
318 for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
319 setcnt = 0;
320 /* compute initial positions and symbols of state 0 */
321 ssmax = 0;
322 ptr = state[0] = foll[0];
323 spinit = *ptr;
324 for (i=0; i<spinit; i++) {
325 curpos = *(++ptr);
326 sposns[i] = curpos;
327 iposns[curpos] = 1;
328 cp = point[curpos];
329 dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
330 switch (type(cp)) {
331 case CHAR:
332 k = (int) right(cp);
333 if (isyms[k] != 1) {
334 isyms[k] = 1;
335 ssyms[ssmax++] = k;
336 }
337 break;
338 case DOT:
339 for (k=1; k<NCHARS; k++) {
340 if (k != HAT) {
341 if (isyms[k] != 1) {
342 isyms[k] = 1;
343 ssyms[ssmax++] = k;
344 }
345 }
346 }
347 break;
348 case CCL:
349 for (p = (char *) right(cp); *p; p++) {
350 if (*p != HAT) {
351 if (isyms[*p] != 1) {
352 isyms[*p] = 1;
353 ssyms[ssmax++] = *p;
354 }
355 }
356 }
357 break;
358 case NCCL:
359 for (k=1; k<NCHARS; k++) {
360 if (k != HAT && !member(k, (char *) right(cp))) {
361 if (isyms[k] != 1) {
362 isyms[k] = 1;
363 ssyms[ssmax++] = k;
364 }
365 }
366 }
367 }
368 }
369 ssinit = ssmax;
370 n = 0;
371 for (s=0; s<=n; s++) {
372 dprintf("s = %d\n", s, NULL, NULL);
373 ind = 0;
374 numtrans = 0;
375 finflg = 0;
376 if (*(state[s] + *state[s]) == line) { /* s final? */
377 finflg = 1;
378 goto tenter;
379 }
380 spmax = spinit;
381 ssmax = ssinit;
382 ptr = state[s];
383 num = *ptr;
384 for (i=0; i<num; i++) {
385 curpos = *(++ptr);
386 if (iposns[curpos] != 1 && index[curpos] != 1) {
387 index[curpos] = 1;
388 sposns[spmax++] = curpos;
389 }
390 cp = point[curpos];
391 switch (type(cp)) {
392 case CHAR:
393 k = (int) right(cp);
394 if (isyms[k] == 0 && symbol[k] == 0) {
395 symbol[k] = 1;
396 ssyms[ssmax++] = k;
397 }
398 break;
399 case DOT:
400 for (k=1; k<NCHARS; k++) {
401 if (k != HAT) {
402 if (isyms[k] == 0 && symbol[k] == 0) {
403 symbol[k] = 1;
404 ssyms[ssmax++] = k;
405 }
406 }
407 }
408 break;
409 case CCL:
410 for (p = (char *) right(cp); *p; p++) {
411 if (*p != HAT) {
412 if (isyms[*p] == 0 && symbol[*p] == 0) {
413 symbol[*p] = 1;
414 ssyms[ssmax++] = *p;
415 }
416 }
417 }
418 break;
419 case NCCL:
420 for (k=1; k<NCHARS; k++) {
421 if (k != HAT && !member(k, (char *) right(cp))) {
422 if (isyms[k] == 0 && symbol[k] == 0) {
423 symbol[k] = 1;
424 ssyms[ssmax++] = k;
425 }
426 }
427 }
428 }
429 }
430 for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
431 c = ssyms[j];
432 symbol[c] = 0;
433 setcnt = 0;
434 for (k=0; k<=line; k++) setvec[k] = 0;
435 for (i=0; i<spmax; i++) {
436 index[sposns[i]] = 0;
437 cp = point[sposns[i]];
438 if ((k = type(cp)) != FINAL)
439 if (k == CHAR && c == (int) right(cp)
440 || k == DOT
441 || k == CCL && member(c, (char *) right(cp))
442 || k == NCCL && !member(c, (char *) right(cp))) {
443 ptr = foll[sposns[i]];
444 num = *ptr;
445 for (k=0; k<num; k++) {
446 if (setvec[*(++ptr)] != 1
447 && iposns[*ptr] != 1) {
448 setvec[*ptr] = 1;
449 setcnt++;
450 }
451 }
452 }
453 } /* end nextstate */
454 if (notin(state, n, &prev)) {
455 if (n >= NSTATES) {
456 dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
457 overflo();
458 }
459 state[++n] = add(setcnt);
460 dprintf(" delta(%d,%o) = %d", s,c,n);
461 dprintf(", ind = %d\n", ind+1, NULL, NULL);
462 fatab[++ind] = c;
463 fatab[++ind] = n;
464 numtrans++;
465 }
466 else {
467 if (prev != 0) {
468 dprintf(" delta(%d,%o) = %d", s,c,prev);
469 dprintf(", ind = %d\n", ind+1, NULL, NULL);
470 fatab[++ind] = c;
471 fatab[++ind] = prev;
472 numtrans++;
473 }
474 }
475 }
476 tenter:
477 if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
478 overflo();
479 where[s] = pfa;
480 if (finflg)
481 pfa->cch = -1; /* s is a final state */
482 else
483 pfa->cch = numtrans;
484 pfa->st = 0;
485 for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
486 pfa->cch = fatab[2*i-1];
487 pfa->st = (struct fa *) fatab[2*i];
488 }
489 }
490 for (i=0; i<=n; i++) {
491 xfree(state[i]); /* free state[i] */
492 pfa = where[i];
493 pfa->st = where[0];
494 dprintf("state %d: (%o)\n", i, pfa, NULL);
495 dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
496 for (k=1; k<=pfa->cch; k++) {
497 (pfa+k)->st = where[ (int) (pfa+k)->st];
498 dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
499 }
500 }
501 pfa = where[0];
502 if ((num = pfa->cch) < 0)
503 return(where[0]);
504 for (pfa += num; num; num--, pfa--)
505 if (pfa->cch == HAT) {
506 return(pfa->st);
507 }
508 return(where[0]);
509}
510
511match(pfa, p)
512register struct fa *pfa;
513register char *p;
514{
515 register count;
516 char c;
517 if (p == 0) return(0);
518 if (pfa->cch == 1) { /* fast test for first character, if possible */
519 c = (++pfa)->cch;
520 do
521 if (c == *p) {
522 p++;
523 pfa = pfa->st;
524 goto adv;
525 }
526 while (*p++ != 0);
527 return(0);
528 }
529 adv: if ((count = pfa->cch) < 0) return(1);
530 do {
531 for (pfa += count; count; count--, pfa--)
532 if (pfa->cch == *p) {
533 break;
534 }
535 pfa = pfa->st;
536 if ((count = pfa->cch) < 0) return(1);
537 } while(*p++ != 0);
538 return(0);
539}