Commit | Line | Data |
---|---|---|
2958d0fe | 1 | #ifndef lint |
435e8dff | 2 | static char sccsid[] = "@(#)dprog.c 4.3 (Berkeley) 89/05/11"; |
2958d0fe RH |
3 | #endif not lint |
4 | ||
5 | /* | |
280efb10 | 6 | * diction -- print all sentences containing one of default phrases |
2958d0fe RH |
7 | * |
8 | * status returns: | |
9 | * 0 - ok, and some matches | |
10 | * 1 - ok, but no matches | |
11 | * 2 - some error | |
12 | */ | |
13 | ||
14 | #include <stdio.h> | |
15 | #include <ctype.h> | |
435e8dff | 16 | #include "pathnames.h" |
2958d0fe RH |
17 | |
18 | #define MAXSIZ 6500 | |
19 | #define QSIZE 650 | |
280efb10 RH |
20 | int linemsg; |
21 | long olcount; | |
22 | long lcount; | |
2958d0fe RH |
23 | struct words { |
24 | char inp; | |
25 | char out; | |
26 | struct words *nst; | |
27 | struct words *link; | |
28 | struct words *fail; | |
29 | } w[MAXSIZ], *smax, *q; | |
30 | ||
280efb10 RH |
31 | char table[128] = { |
32 | 0, 0, 0, 0, 0, 0, 0, 0, | |
33 | 0, 0, ' ', 0, 0, 0, 0, 0, | |
34 | 0, 0, 0, 0, 0, 0, 0, 0, | |
35 | 0, 0, 0, 0, 0, 0, 0, 0, | |
36 | ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', | |
37 | ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', | |
38 | '0', '1', '2', '3', '4', '5', '6', '7', | |
39 | '8', '9', ' ', ' ', ' ', ' ', ' ', '.', | |
40 | ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', | |
41 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', | |
42 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', | |
43 | 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', | |
44 | ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', | |
45 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', | |
46 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', | |
47 | 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' | |
48 | }; | |
49 | int caps = 0; | |
50 | int lineno = 0; | |
2958d0fe RH |
51 | int fflag; |
52 | int nflag = 1; /*use default file*/ | |
53 | char *filename; | |
280efb10 | 54 | int mflg = 0; /*don't catch output*/ |
2958d0fe RH |
55 | int nfile; |
56 | int nsucc; | |
57 | long nsent = 0; | |
58 | long nhits = 0; | |
59 | char *nlp; | |
60 | char *begp, *endp; | |
280efb10 RH |
61 | int beg, last; |
62 | char *myst; | |
63 | int myct = 0; | |
2958d0fe RH |
64 | int oct = 0; |
65 | FILE *wordf; | |
280efb10 | 66 | FILE *mine; |
2958d0fe | 67 | char *argptr; |
280efb10 RH |
68 | long tl = 0; |
69 | long th = 0; | |
2958d0fe RH |
70 | |
71 | main(argc, argv) | |
280efb10 | 72 | char *argv[]; |
2958d0fe | 73 | { |
280efb10 | 74 | int sv; |
2958d0fe RH |
75 | while (--argc > 0 && (++argv)[0][0]=='-') |
76 | switch (argv[0][1]) { | |
77 | ||
78 | case 'f': | |
79 | fflag++; | |
280efb10 | 80 | filename = (++argv)[0]; |
2958d0fe RH |
81 | argc--; |
82 | continue; | |
83 | ||
84 | case 'n': | |
85 | nflag = 0; | |
86 | continue; | |
87 | case 'd': | |
280efb10 RH |
88 | mflg=0; |
89 | continue; | |
90 | case 'c': | |
91 | caps++; | |
92 | continue; | |
93 | case 'l': | |
94 | lineno++; | |
2958d0fe RH |
95 | continue; |
96 | default: | |
97 | fprintf(stderr, "diction: unknown flag\n"); | |
98 | continue; | |
99 | } | |
100 | out: | |
101 | if(nflag){ | |
435e8dff | 102 | wordf = fopen(_PATH_DICT,"r"); |
2958d0fe RH |
103 | if(wordf == NULL){ |
104 | fprintf(stderr,"diction: can't open default dictionary\n"); | |
105 | exit(2); | |
106 | } | |
107 | } | |
108 | else { | |
280efb10 | 109 | wordf = fopen(filename,"r"); |
2958d0fe RH |
110 | if(wordf == NULL){ |
111 | fprintf(stderr,"diction: can't open %s\n",filename); | |
112 | exit(2); | |
113 | } | |
114 | } | |
115 | ||
280efb10 RH |
116 | #ifdef CATCH |
117 | if(fopen(CATCH,"r") != NULL) | |
118 | if((mine=fopen(CATCH,"a"))!=NULL)mflg=1; | |
119 | #endif | |
120 | #ifdef MACS | |
121 | if(caps){ | |
122 | printf(".so "); | |
123 | printf(MACS); | |
124 | printf("\n"); | |
125 | } | |
126 | #endif | |
2958d0fe RH |
127 | cgotofn(); |
128 | cfail(); | |
129 | nfile = argc; | |
130 | if (argc<=0) { | |
131 | execute((char *)NULL); | |
132 | } | |
133 | else while (--argc >= 0) { | |
134 | execute(*argv); | |
280efb10 RH |
135 | if(lineno){ |
136 | printf("file %s: number of lines %ld number of phrases found %ld\n", | |
137 | *argv, lcount-1, nhits); | |
138 | tl += lcount-1; | |
139 | th += nhits; | |
140 | sv = lcount-1; | |
141 | lcount = nhits = 0; | |
142 | } | |
2958d0fe RH |
143 | argv++; |
144 | } | |
280efb10 RH |
145 | if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); |
146 | if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); | |
147 | else if(tl != sv) | |
148 | if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); | |
2958d0fe RH |
149 | exit(nsucc == 0); |
150 | } | |
151 | ||
152 | execute(file) | |
153 | char *file; | |
154 | { | |
155 | register char *p; | |
156 | register struct words *c; | |
157 | register ccount; | |
280efb10 RH |
158 | int count1; |
159 | char *beg1; | |
2958d0fe RH |
160 | struct words *savc; |
161 | char *savp; | |
162 | int savct; | |
163 | int scr; | |
164 | char buf[1024]; | |
165 | int f; | |
166 | int hit; | |
280efb10 | 167 | last = 0; |
2958d0fe RH |
168 | if (file) { |
169 | if ((f = open(file, 0)) < 0) { | |
170 | fprintf(stderr, "diction: can't open %s\n", file); | |
171 | exit(2); | |
172 | } | |
173 | } | |
174 | else f = 0; | |
280efb10 RH |
175 | lcount = olcount = 1; |
176 | linemsg = 1; | |
2958d0fe | 177 | ccount = 0; |
280efb10 | 178 | count1 = -1; |
2958d0fe RH |
179 | p = buf; |
180 | nlp = p; | |
181 | c = w; | |
182 | oct = hit = 0; | |
280efb10 RH |
183 | savc = (struct words *) 0; |
184 | savp = (char *) 0; | |
2958d0fe | 185 | for (;;) { |
280efb10 | 186 | if(--ccount <= 0) { |
2958d0fe RH |
187 | if (p == &buf[1024]) p = buf; |
188 | if (p > &buf[512]) { | |
189 | if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; | |
190 | } | |
191 | else if ((ccount = read(f, p, 512)) <= 0) break; | |
280efb10 RH |
192 | if(caps && (count1 > 0)) |
193 | fwrite(beg1,sizeof(*beg1),count1,stdout); | |
194 | count1 = ccount; | |
195 | beg1 = p; | |
2958d0fe RH |
196 | } |
197 | if(p == &buf[1024])p=buf; | |
198 | nstate: | |
280efb10 | 199 | if (c->inp == table[*p]) { |
2958d0fe RH |
200 | c = c->nst; |
201 | } | |
202 | else if (c->link != 0) { | |
203 | c = c->link; | |
204 | goto nstate; | |
205 | } | |
206 | else { | |
207 | if(savp != 0){ | |
208 | c=savc; | |
209 | p=savp; | |
210 | if(ccount > savct)ccount += savct; | |
211 | else ccount = savct; | |
280efb10 RH |
212 | savc = (struct words *) 0; |
213 | savp = (char *) 0; | |
2958d0fe RH |
214 | goto hadone; |
215 | } | |
216 | c = c->fail; | |
217 | if (c==0) { | |
218 | c = w; | |
219 | istate: | |
280efb10 | 220 | if (c->inp == table[*p]) { |
2958d0fe RH |
221 | c = c->nst; |
222 | } | |
223 | else if (c->link != 0) { | |
224 | c = c->link; | |
225 | goto istate; | |
226 | } | |
227 | } | |
228 | else goto nstate; | |
229 | } | |
230 | if(c->out){ | |
280efb10 | 231 | if((c->inp == table[*(p+1)]) && (c->nst != 0)){ |
2958d0fe RH |
232 | savp=p; |
233 | savc=c; | |
234 | savct=ccount; | |
235 | goto cont; | |
236 | } | |
237 | else if(c->link != 0){ | |
238 | savc=c; | |
239 | while((savc=savc->link)!= 0){ | |
280efb10 | 240 | if(savc->inp == table[*(p+1)]){ |
2958d0fe RH |
241 | savp=p; |
242 | savc=c; | |
243 | savct=ccount; | |
244 | goto cont; | |
245 | } | |
246 | } | |
247 | } | |
248 | hadone: | |
280efb10 RH |
249 | savc = (struct words *) 0; |
250 | savp = (char *) 0; | |
2958d0fe RH |
251 | if(c->out == (char)(0377)){ |
252 | c=w; | |
253 | goto nstate; | |
254 | } | |
255 | begp = p - (c->out); | |
256 | if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); | |
257 | endp=p; | |
280efb10 RH |
258 | if(mflg){ |
259 | if(begp-20 < &buf[0]){ | |
260 | myst = &buf[1024]-20; | |
261 | if(nlp < &buf[512])myst=nlp; | |
262 | } | |
263 | else myst = begp-20; | |
264 | if(myst < nlp)myst = nlp; | |
265 | beg = 0; | |
266 | } | |
2958d0fe RH |
267 | hit = 1; |
268 | nhits++; | |
280efb10 RH |
269 | if(*p == '\n')lcount++; |
270 | if (table[*p++] == '.') { | |
271 | linemsg = 1; | |
2958d0fe RH |
272 | if (--ccount <= 0) { |
273 | if (p == &buf[1024]) p = buf; | |
274 | if (p > &buf[512]) { | |
275 | if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; | |
276 | } | |
277 | else if ((ccount = read(f, p, 512)) <= 0) break; | |
280efb10 RH |
278 | if(caps && (count1 > 0)) |
279 | fwrite(beg1,sizeof(*beg1),count1,stdout); | |
280 | count1=ccount; | |
281 | beg1=p; | |
2958d0fe RH |
282 | } |
283 | } | |
284 | succeed: nsucc = 1; | |
285 | { | |
286 | if (p <= nlp) { | |
280efb10 | 287 | outc(&buf[1024],file); |
2958d0fe RH |
288 | nlp = buf; |
289 | } | |
280efb10 | 290 | outc(p,file); |
2958d0fe | 291 | } |
280efb10 | 292 | if(mflg)last=1; |
2958d0fe RH |
293 | nomatch: |
294 | nlp = p; | |
295 | c = w; | |
296 | begp = endp = 0; | |
297 | continue; | |
298 | } | |
299 | cont: | |
280efb10 RH |
300 | if(*p == '\n')lcount++; |
301 | if (table[*p++] == '.'){ | |
2958d0fe RH |
302 | if(hit){ |
303 | if(p <= nlp){ | |
280efb10 | 304 | outc(&buf[1024],file); |
2958d0fe RH |
305 | nlp = buf; |
306 | } | |
280efb10 RH |
307 | outc(p,file); |
308 | if(!caps)printf("\n\n"); | |
309 | if(mflg && last){putc('\n',mine);myct = 0;} | |
2958d0fe | 310 | } |
280efb10 RH |
311 | linemsg = 1; |
312 | if(*p == '\n')olcount = lcount+1; | |
313 | else | |
314 | olcount=lcount; | |
315 | last = 0; | |
2958d0fe RH |
316 | hit = 0; |
317 | oct = 0; | |
318 | nlp = p; | |
319 | c = w; | |
320 | begp = endp = 0; | |
280efb10 | 321 | nsent++; |
2958d0fe RH |
322 | } |
323 | } | |
280efb10 RH |
324 | if(caps && (count1 > 0)) |
325 | fwrite(beg1,sizeof(*beg1),count1,stdout); | |
2958d0fe RH |
326 | close(f); |
327 | } | |
328 | ||
329 | getargc() | |
330 | { | |
331 | register c; | |
332 | if (wordf){ | |
333 | if((c=getc(wordf))==EOF){ | |
334 | fclose(wordf); | |
335 | if(nflag && fflag){ | |
336 | nflag=0; | |
280efb10 | 337 | wordf=fopen(filename,"r"); |
2958d0fe | 338 | if(wordf == NULL){ |
280efb10 | 339 | fprintf("diction can't open %s\n",filename); |
2958d0fe RH |
340 | exit(2); |
341 | } | |
342 | return(getc(wordf)); | |
343 | } | |
344 | else return(EOF); | |
345 | } | |
346 | else return(c); | |
347 | } | |
348 | if ((c = *argptr++) == '\0') | |
349 | return(EOF); | |
350 | return(c); | |
351 | } | |
352 | ||
353 | cgotofn() { | |
354 | register c; | |
355 | register struct words *s; | |
356 | register ct; | |
357 | int neg; | |
358 | ||
359 | s = smax = w; | |
360 | neg = ct = 0; | |
361 | nword: for(;;) { | |
362 | c = getargc(); | |
363 | if(c == '~'){ | |
364 | neg++; | |
365 | c = getargc(); | |
366 | } | |
367 | if (c==EOF) | |
368 | return; | |
369 | if (c == '\n') { | |
370 | if(neg)s->out = 0377; | |
371 | else s->out = ct-1; | |
372 | neg = ct = 0; | |
373 | s = w; | |
374 | } else { | |
375 | loop: if (s->inp == c) { | |
376 | s = s->nst; | |
377 | ct++; | |
378 | continue; | |
379 | } | |
380 | if (s->inp == 0) goto enter; | |
381 | if (s->link == 0) { | |
382 | if (smax >= &w[MAXSIZ - 1]) overflo(); | |
383 | s->link = ++smax; | |
384 | s = smax; | |
385 | goto enter; | |
386 | } | |
387 | s = s->link; | |
388 | goto loop; | |
389 | } | |
390 | } | |
391 | ||
392 | enter: | |
393 | do { | |
394 | s->inp = c; | |
395 | ct++; | |
396 | if (smax >= &w[MAXSIZ - 1]) overflo(); | |
397 | s->nst = ++smax; | |
398 | s = smax; | |
399 | } while ((c = getargc()) != '\n' && c!=EOF); | |
400 | if(neg)smax->out = 0377; | |
401 | else smax->out = ct-1; | |
402 | neg = ct = 0; | |
403 | s = w; | |
404 | if (c != EOF) | |
405 | goto nword; | |
406 | } | |
407 | ||
408 | overflo() { | |
409 | fprintf(stderr, "wordlist too large\n"); | |
410 | exit(2); | |
411 | } | |
412 | cfail() { | |
413 | struct words *queue[QSIZE]; | |
414 | struct words **front, **rear; | |
415 | struct words *state; | |
416 | int bstart; | |
417 | register char c; | |
418 | register struct words *s; | |
419 | s = w; | |
420 | front = rear = queue; | |
421 | init: if ((s->inp) != 0) { | |
422 | *rear++ = s->nst; | |
423 | if (rear >= &queue[QSIZE - 1]) overflo(); | |
424 | } | |
425 | if ((s = s->link) != 0) { | |
426 | goto init; | |
427 | } | |
428 | ||
429 | while (rear!=front) { | |
430 | s = *front; | |
431 | if (front == &queue[QSIZE-1]) | |
432 | front = queue; | |
433 | else front++; | |
434 | cloop: if ((c = s->inp) != 0) { | |
435 | bstart=0; | |
436 | *rear = (q = s->nst); | |
437 | if (front < rear) | |
438 | if (rear >= &queue[QSIZE-1]) | |
439 | if (front == queue) overflo(); | |
440 | else rear = queue; | |
441 | else rear++; | |
442 | else | |
443 | if (++rear == front) overflo(); | |
444 | state = s->fail; | |
445 | floop: if (state == 0){ state = w;bstart=1;} | |
446 | if (state->inp == c) { | |
447 | qloop: q->fail = state->nst; | |
448 | if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; | |
449 | if((q=q->link) != 0)goto qloop; | |
450 | } | |
451 | else if ((state = state->link) != 0) | |
452 | goto floop; | |
453 | else if(bstart==0){state=0; goto floop;} | |
454 | } | |
455 | if ((s = s->link) != 0) | |
456 | goto cloop; | |
457 | } | |
458 | /* for(s=w;s<=smax;s++) | |
459 | printf("s %d ch %c out %d nst %d link %d fail %d\n",s, | |
460 | s->inp,s->out,s->nst,s->link,s->fail); | |
461 | */ | |
462 | } | |
280efb10 | 463 | outc(addr,file) |
2958d0fe | 464 | char *addr; |
280efb10 | 465 | char *file; |
2958d0fe | 466 | { |
280efb10 | 467 | int inside; |
2958d0fe | 468 | |
280efb10 RH |
469 | inside = 0; |
470 | if(!caps && lineno && linemsg){ | |
471 | printf("beginning line %ld",olcount); | |
472 | if(file != (char *)NULL)printf(" %s\n",file); | |
473 | else printf("\n"); | |
474 | linemsg = 0; | |
475 | } | |
2958d0fe | 476 | while(nlp < addr){ |
280efb10 | 477 | if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ |
2958d0fe RH |
478 | oct=0; |
479 | putchar('\n'); | |
480 | } | |
481 | if(nlp == begp){ | |
280efb10 RH |
482 | if(caps)inside++; |
483 | else { | |
484 | if( oct >45){putchar('\n'); | |
485 | oct=0; | |
486 | } | |
487 | if( oct==0 || table[*nlp] != ' '){ | |
488 | printf("*["); | |
489 | oct+=2; | |
490 | } | |
491 | else {printf(" *[");; | |
492 | oct+=3; | |
493 | } | |
494 | } | |
495 | if(mflg)putc('[',mine); | |
496 | } | |
497 | if(inside){ | |
498 | if(islower(*nlp))*nlp = toupper(*nlp); | |
499 | } | |
500 | else { | |
501 | if(!caps && *nlp == '\n')*nlp = ' '; | |
502 | if(*nlp == ' ' && oct==0); | |
503 | else if(!caps) {putchar(*nlp); oct++;} | |
2958d0fe | 504 | } |
2958d0fe | 505 | if(nlp == endp){ |
280efb10 RH |
506 | if(caps) |
507 | inside= 0; | |
508 | else { | |
509 | if(*(nlp) != ' '){printf("]*"); | |
510 | oct+=2; | |
511 | } | |
512 | else {printf("]* "); | |
513 | oct+=3; | |
514 | } | |
515 | if(oct >60){putchar('\n'); | |
516 | oct=0; | |
517 | } | |
518 | } | |
519 | if(mflg)putc(']',mine); | |
520 | beg = 0; | |
521 | } | |
522 | if(mflg){ | |
523 | if(nlp == myst)beg = 1; | |
524 | if(beg || last){ | |
525 | putc(*nlp,mine); | |
526 | if(myct++ >= 72 || last == 20){ | |
527 | putc('\n',mine); | |
528 | if(last == 20)last=myct=0; | |
529 | else myct=0; | |
530 | } | |
531 | if(last)last++; | |
532 | } | |
2958d0fe RH |
533 | } |
534 | nlp++; | |
535 | } | |
536 | } |