file reorg, pathnames.h, paths.h
[unix-history] / usr / src / usr.bin / diction / diction / dprog.c
CommitLineData
2958d0fe 1#ifndef lint
435e8dff 2static char sccsid[] = "@(#)dprog.c 4.3 (Berkeley) 89/05/11";
2958d0fe
RH
3#endif not lint
4
5/*
280efb10 6 * diction -- print all sentences containing one of default phrases
2958d0fe
RH
7 *
8 * status returns:
9 * 0 - ok, and some matches
10 * 1 - ok, but no matches
11 * 2 - some error
12 */
13
14#include <stdio.h>
15#include <ctype.h>
435e8dff 16#include "pathnames.h"
2958d0fe
RH
17
18#define MAXSIZ 6500
19#define QSIZE 650
280efb10
RH
20int linemsg;
21long olcount;
22long lcount;
2958d0fe
RH
23struct words {
24 char inp;
25 char out;
26 struct words *nst;
27 struct words *link;
28 struct words *fail;
29} w[MAXSIZ], *smax, *q;
30
280efb10
RH
31char table[128] = {
32 0, 0, 0, 0, 0, 0, 0, 0,
33 0, 0, ' ', 0, 0, 0, 0, 0,
34 0, 0, 0, 0, 0, 0, 0, 0,
35 0, 0, 0, 0, 0, 0, 0, 0,
36 ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
37 ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
38 '0', '1', '2', '3', '4', '5', '6', '7',
39 '8', '9', ' ', ' ', ' ', ' ', ' ', '.',
40 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
41 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
42 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
43 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
44 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
45 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
46 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
47 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
48 };
49int caps = 0;
50int lineno = 0;
2958d0fe
RH
51int fflag;
52int nflag = 1; /*use default file*/
53char *filename;
280efb10 54int mflg = 0; /*don't catch output*/
2958d0fe
RH
55int nfile;
56int nsucc;
57long nsent = 0;
58long nhits = 0;
59char *nlp;
60char *begp, *endp;
280efb10
RH
61int beg, last;
62char *myst;
63int myct = 0;
2958d0fe
RH
64int oct = 0;
65FILE *wordf;
280efb10 66FILE *mine;
2958d0fe 67char *argptr;
280efb10
RH
68long tl = 0;
69long th = 0;
2958d0fe
RH
70
71main(argc, argv)
280efb10 72char *argv[];
2958d0fe 73{
280efb10 74 int sv;
2958d0fe
RH
75 while (--argc > 0 && (++argv)[0][0]=='-')
76 switch (argv[0][1]) {
77
78 case 'f':
79 fflag++;
280efb10 80 filename = (++argv)[0];
2958d0fe
RH
81 argc--;
82 continue;
83
84 case 'n':
85 nflag = 0;
86 continue;
87 case 'd':
280efb10
RH
88 mflg=0;
89 continue;
90 case 'c':
91 caps++;
92 continue;
93 case 'l':
94 lineno++;
2958d0fe
RH
95 continue;
96 default:
97 fprintf(stderr, "diction: unknown flag\n");
98 continue;
99 }
100out:
101 if(nflag){
435e8dff 102 wordf = fopen(_PATH_DICT,"r");
2958d0fe
RH
103 if(wordf == NULL){
104 fprintf(stderr,"diction: can't open default dictionary\n");
105 exit(2);
106 }
107 }
108 else {
280efb10 109 wordf = fopen(filename,"r");
2958d0fe
RH
110 if(wordf == NULL){
111 fprintf(stderr,"diction: can't open %s\n",filename);
112 exit(2);
113 }
114 }
115
280efb10
RH
116#ifdef CATCH
117 if(fopen(CATCH,"r") != NULL)
118 if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
119#endif
120#ifdef MACS
121 if(caps){
122 printf(".so ");
123 printf(MACS);
124 printf("\n");
125 }
126#endif
2958d0fe
RH
127 cgotofn();
128 cfail();
129 nfile = argc;
130 if (argc<=0) {
131 execute((char *)NULL);
132 }
133 else while (--argc >= 0) {
134 execute(*argv);
280efb10
RH
135 if(lineno){
136 printf("file %s: number of lines %ld number of phrases found %ld\n",
137 *argv, lcount-1, nhits);
138 tl += lcount-1;
139 th += nhits;
140 sv = lcount-1;
141 lcount = nhits = 0;
142 }
2958d0fe
RH
143 argv++;
144 }
280efb10
RH
145 if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
146 if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
147 else if(tl != sv)
148 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
2958d0fe
RH
149 exit(nsucc == 0);
150}
151
152execute(file)
153char *file;
154{
155 register char *p;
156 register struct words *c;
157 register ccount;
280efb10
RH
158 int count1;
159 char *beg1;
2958d0fe
RH
160 struct words *savc;
161 char *savp;
162 int savct;
163 int scr;
164 char buf[1024];
165 int f;
166 int hit;
280efb10 167 last = 0;
2958d0fe
RH
168 if (file) {
169 if ((f = open(file, 0)) < 0) {
170 fprintf(stderr, "diction: can't open %s\n", file);
171 exit(2);
172 }
173 }
174 else f = 0;
280efb10
RH
175 lcount = olcount = 1;
176 linemsg = 1;
2958d0fe 177 ccount = 0;
280efb10 178 count1 = -1;
2958d0fe
RH
179 p = buf;
180 nlp = p;
181 c = w;
182 oct = hit = 0;
280efb10
RH
183 savc = (struct words *) 0;
184 savp = (char *) 0;
2958d0fe 185 for (;;) {
280efb10 186 if(--ccount <= 0) {
2958d0fe
RH
187 if (p == &buf[1024]) p = buf;
188 if (p > &buf[512]) {
189 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
190 }
191 else if ((ccount = read(f, p, 512)) <= 0) break;
280efb10
RH
192 if(caps && (count1 > 0))
193 fwrite(beg1,sizeof(*beg1),count1,stdout);
194 count1 = ccount;
195 beg1 = p;
2958d0fe
RH
196 }
197 if(p == &buf[1024])p=buf;
198 nstate:
280efb10 199 if (c->inp == table[*p]) {
2958d0fe
RH
200 c = c->nst;
201 }
202 else if (c->link != 0) {
203 c = c->link;
204 goto nstate;
205 }
206 else {
207 if(savp != 0){
208 c=savc;
209 p=savp;
210 if(ccount > savct)ccount += savct;
211 else ccount = savct;
280efb10
RH
212 savc = (struct words *) 0;
213 savp = (char *) 0;
2958d0fe
RH
214 goto hadone;
215 }
216 c = c->fail;
217 if (c==0) {
218 c = w;
219 istate:
280efb10 220 if (c->inp == table[*p]) {
2958d0fe
RH
221 c = c->nst;
222 }
223 else if (c->link != 0) {
224 c = c->link;
225 goto istate;
226 }
227 }
228 else goto nstate;
229 }
230 if(c->out){
280efb10 231 if((c->inp == table[*(p+1)]) && (c->nst != 0)){
2958d0fe
RH
232 savp=p;
233 savc=c;
234 savct=ccount;
235 goto cont;
236 }
237 else if(c->link != 0){
238 savc=c;
239 while((savc=savc->link)!= 0){
280efb10 240 if(savc->inp == table[*(p+1)]){
2958d0fe
RH
241 savp=p;
242 savc=c;
243 savct=ccount;
244 goto cont;
245 }
246 }
247 }
248 hadone:
280efb10
RH
249 savc = (struct words *) 0;
250 savp = (char *) 0;
2958d0fe
RH
251 if(c->out == (char)(0377)){
252 c=w;
253 goto nstate;
254 }
255 begp = p - (c->out);
256 if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
257 endp=p;
280efb10
RH
258 if(mflg){
259 if(begp-20 < &buf[0]){
260 myst = &buf[1024]-20;
261 if(nlp < &buf[512])myst=nlp;
262 }
263 else myst = begp-20;
264 if(myst < nlp)myst = nlp;
265 beg = 0;
266 }
2958d0fe
RH
267 hit = 1;
268 nhits++;
280efb10
RH
269 if(*p == '\n')lcount++;
270 if (table[*p++] == '.') {
271 linemsg = 1;
2958d0fe
RH
272 if (--ccount <= 0) {
273 if (p == &buf[1024]) p = buf;
274 if (p > &buf[512]) {
275 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
276 }
277 else if ((ccount = read(f, p, 512)) <= 0) break;
280efb10
RH
278 if(caps && (count1 > 0))
279 fwrite(beg1,sizeof(*beg1),count1,stdout);
280 count1=ccount;
281 beg1=p;
2958d0fe
RH
282 }
283 }
284 succeed: nsucc = 1;
285 {
286 if (p <= nlp) {
280efb10 287 outc(&buf[1024],file);
2958d0fe
RH
288 nlp = buf;
289 }
280efb10 290 outc(p,file);
2958d0fe 291 }
280efb10 292 if(mflg)last=1;
2958d0fe
RH
293 nomatch:
294 nlp = p;
295 c = w;
296 begp = endp = 0;
297 continue;
298 }
299 cont:
280efb10
RH
300 if(*p == '\n')lcount++;
301 if (table[*p++] == '.'){
2958d0fe
RH
302 if(hit){
303 if(p <= nlp){
280efb10 304 outc(&buf[1024],file);
2958d0fe
RH
305 nlp = buf;
306 }
280efb10
RH
307 outc(p,file);
308 if(!caps)printf("\n\n");
309 if(mflg && last){putc('\n',mine);myct = 0;}
2958d0fe 310 }
280efb10
RH
311 linemsg = 1;
312 if(*p == '\n')olcount = lcount+1;
313 else
314 olcount=lcount;
315 last = 0;
2958d0fe
RH
316 hit = 0;
317 oct = 0;
318 nlp = p;
319 c = w;
320 begp = endp = 0;
280efb10 321 nsent++;
2958d0fe
RH
322 }
323 }
280efb10
RH
324 if(caps && (count1 > 0))
325 fwrite(beg1,sizeof(*beg1),count1,stdout);
2958d0fe
RH
326 close(f);
327}
328
329getargc()
330{
331 register c;
332 if (wordf){
333 if((c=getc(wordf))==EOF){
334 fclose(wordf);
335 if(nflag && fflag){
336 nflag=0;
280efb10 337 wordf=fopen(filename,"r");
2958d0fe 338 if(wordf == NULL){
280efb10 339 fprintf("diction can't open %s\n",filename);
2958d0fe
RH
340 exit(2);
341 }
342 return(getc(wordf));
343 }
344 else return(EOF);
345 }
346 else return(c);
347 }
348 if ((c = *argptr++) == '\0')
349 return(EOF);
350 return(c);
351}
352
353cgotofn() {
354 register c;
355 register struct words *s;
356 register ct;
357 int neg;
358
359 s = smax = w;
360 neg = ct = 0;
361nword: for(;;) {
362 c = getargc();
363 if(c == '~'){
364 neg++;
365 c = getargc();
366 }
367 if (c==EOF)
368 return;
369 if (c == '\n') {
370 if(neg)s->out = 0377;
371 else s->out = ct-1;
372 neg = ct = 0;
373 s = w;
374 } else {
375 loop: if (s->inp == c) {
376 s = s->nst;
377 ct++;
378 continue;
379 }
380 if (s->inp == 0) goto enter;
381 if (s->link == 0) {
382 if (smax >= &w[MAXSIZ - 1]) overflo();
383 s->link = ++smax;
384 s = smax;
385 goto enter;
386 }
387 s = s->link;
388 goto loop;
389 }
390 }
391
392 enter:
393 do {
394 s->inp = c;
395 ct++;
396 if (smax >= &w[MAXSIZ - 1]) overflo();
397 s->nst = ++smax;
398 s = smax;
399 } while ((c = getargc()) != '\n' && c!=EOF);
400 if(neg)smax->out = 0377;
401 else smax->out = ct-1;
402 neg = ct = 0;
403 s = w;
404 if (c != EOF)
405 goto nword;
406}
407
408overflo() {
409 fprintf(stderr, "wordlist too large\n");
410 exit(2);
411}
412cfail() {
413 struct words *queue[QSIZE];
414 struct words **front, **rear;
415 struct words *state;
416 int bstart;
417 register char c;
418 register struct words *s;
419 s = w;
420 front = rear = queue;
421init: if ((s->inp) != 0) {
422 *rear++ = s->nst;
423 if (rear >= &queue[QSIZE - 1]) overflo();
424 }
425 if ((s = s->link) != 0) {
426 goto init;
427 }
428
429 while (rear!=front) {
430 s = *front;
431 if (front == &queue[QSIZE-1])
432 front = queue;
433 else front++;
434 cloop: if ((c = s->inp) != 0) {
435 bstart=0;
436 *rear = (q = s->nst);
437 if (front < rear)
438 if (rear >= &queue[QSIZE-1])
439 if (front == queue) overflo();
440 else rear = queue;
441 else rear++;
442 else
443 if (++rear == front) overflo();
444 state = s->fail;
445 floop: if (state == 0){ state = w;bstart=1;}
446 if (state->inp == c) {
447 qloop: q->fail = state->nst;
448 if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
449 if((q=q->link) != 0)goto qloop;
450 }
451 else if ((state = state->link) != 0)
452 goto floop;
453 else if(bstart==0){state=0; goto floop;}
454 }
455 if ((s = s->link) != 0)
456 goto cloop;
457 }
458/* for(s=w;s<=smax;s++)
459 printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
460 s->inp,s->out,s->nst,s->link,s->fail);
461*/
462}
280efb10 463outc(addr,file)
2958d0fe 464char *addr;
280efb10 465char *file;
2958d0fe 466{
280efb10 467 int inside;
2958d0fe 468
280efb10
RH
469 inside = 0;
470 if(!caps && lineno && linemsg){
471 printf("beginning line %ld",olcount);
472 if(file != (char *)NULL)printf(" %s\n",file);
473 else printf("\n");
474 linemsg = 0;
475 }
2958d0fe 476 while(nlp < addr){
280efb10 477 if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
2958d0fe
RH
478 oct=0;
479 putchar('\n');
480 }
481 if(nlp == begp){
280efb10
RH
482 if(caps)inside++;
483 else {
484 if( oct >45){putchar('\n');
485 oct=0;
486 }
487 if( oct==0 || table[*nlp] != ' '){
488 printf("*[");
489 oct+=2;
490 }
491 else {printf(" *[");;
492 oct+=3;
493 }
494 }
495 if(mflg)putc('[',mine);
496 }
497 if(inside){
498 if(islower(*nlp))*nlp = toupper(*nlp);
499 }
500 else {
501 if(!caps && *nlp == '\n')*nlp = ' ';
502 if(*nlp == ' ' && oct==0);
503 else if(!caps) {putchar(*nlp); oct++;}
2958d0fe 504 }
2958d0fe 505 if(nlp == endp){
280efb10
RH
506 if(caps)
507 inside= 0;
508 else {
509 if(*(nlp) != ' '){printf("]*");
510 oct+=2;
511 }
512 else {printf("]* ");
513 oct+=3;
514 }
515 if(oct >60){putchar('\n');
516 oct=0;
517 }
518 }
519 if(mflg)putc(']',mine);
520 beg = 0;
521 }
522 if(mflg){
523 if(nlp == myst)beg = 1;
524 if(beg || last){
525 putc(*nlp,mine);
526 if(myct++ >= 72 || last == 20){
527 putc('\n',mine);
528 if(last == 20)last=myct=0;
529 else myct=0;
530 }
531 if(last)last++;
532 }
2958d0fe
RH
533 }
534 nlp++;
535 }
536}