BSD 4 development
[unix-history] / usr / src / cmd / diction / diction.c
CommitLineData
3b381e99
BJ
1/*
2* diction -- print all sentences containing one of default phrases
3 *
4 * status returns:
5 * 0 - ok, and some matches
6 * 1 - ok, but no matches
7 * 2 - some error
8 */
9
10#include <stdio.h>
11#include <ctype.h>
12
13#define MAXSIZ 6500
14#define QSIZE 650
15struct words {
16 char inp;
17 char out;
18 struct words *nst;
19 struct words *link;
20 struct words *fail;
21} w[MAXSIZ], *smax, *q;
22
23int fflag;
24int nflag = 1; /*use default file*/
25char *filename;
26int nfile;
27int nsucc;
28long nsent = 0;
29long nhits = 0;
30char *nlp;
31char *begp, *endp;
32int oct = 0;
33FILE *wordf;
34char *argptr;
35
36main(argc, argv)
37char **argv;
38{
39 while (--argc > 0 && (++argv)[0][0]=='-')
40 switch (argv[0][1]) {
41
42 case 'f':
43 fflag++;
44 filename = ++argv;
45 argc--;
46 continue;
47
48 case 'n':
49 nflag = 0;
50 continue;
51 case 'd':
52 continue;
53 default:
54 fprintf(stderr, "diction: unknown flag\n");
55 continue;
56 }
57out:
58 if(nflag){
59 wordf = fopen(DICT,"r");
60 if(wordf == NULL){
61 fprintf(stderr,"diction: can't open default dictionary\n");
62 exit(2);
63 }
64 }
65 else {
66 wordf = fopen(*filename,"r");
67 if(wordf == NULL){
68 fprintf(stderr,"diction: can't open %s\n",filename);
69 exit(2);
70 }
71 }
72
73 cgotofn();
74 cfail();
75 nfile = argc;
76 if (argc<=0) {
77 execute((char *)NULL);
78 }
79 else while (--argc >= 0) {
80 execute(*argv);
81 argv++;
82 }
83 printf("number of sentences %ld number of hits %ld\n",nsent,nhits);
84 exit(nsucc == 0);
85}
86
87execute(file)
88char *file;
89{
90 register char *p;
91 register struct words *c;
92 register ccount;
93 struct words *savc;
94 char *savp;
95 int savct;
96 int scr;
97 char buf[1024];
98 int f;
99 int hit;
100 if (file) {
101 if ((f = open(file, 0)) < 0) {
102 fprintf(stderr, "diction: can't open %s\n", file);
103 exit(2);
104 }
105 }
106 else f = 0;
107 ccount = 0;
108 p = buf;
109 nlp = p;
110 c = w;
111 oct = hit = 0;
112 savc = savp = 0;
113 for (;;) {
114 if (--ccount <= 0) {
115 if (p == &buf[1024]) p = buf;
116 if (p > &buf[512]) {
117 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
118 }
119 else if ((ccount = read(f, p, 512)) <= 0) break;
120 convert(p,ccount);
121 }
122 if(p == &buf[1024])p=buf;
123 nstate:
124 if (c->inp == *p) {
125 c = c->nst;
126 }
127 else if (c->link != 0) {
128 c = c->link;
129 goto nstate;
130 }
131 else {
132 if(savp != 0){
133 c=savc;
134 p=savp;
135 if(ccount > savct)ccount += savct;
136 else ccount = savct;
137 savc=savp=0;
138 goto hadone;
139 }
140 c = c->fail;
141 if (c==0) {
142 c = w;
143 istate:
144 if (c->inp == *p) {
145 c = c->nst;
146 }
147 else if (c->link != 0) {
148 c = c->link;
149 goto istate;
150 }
151 }
152 else goto nstate;
153 }
154 if(c->out){
155 if((c->inp == *(p+1)) && (c->nst != 0)){
156 savp=p;
157 savc=c;
158 savct=ccount;
159 goto cont;
160 }
161 else if(c->link != 0){
162 savc=c;
163 while((savc=savc->link)!= 0){
164 if(savc->inp == *(p+1)){
165 savp=p;
166 savc=c;
167 savct=ccount;
168 goto cont;
169 }
170 }
171 }
172 hadone:
173 savc=savp=0;
174 if(c->out == (char)(0377)){
175 c=w;
176 goto nstate;
177 }
178 begp = p - (c->out);
179 if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
180 endp=p;
181 hit = 1;
182 nhits++;
183 if (*p++ == '.') {
184 if (--ccount <= 0) {
185 if (p == &buf[1024]) p = buf;
186 if (p > &buf[512]) {
187 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
188 }
189 else if ((ccount = read(f, p, 512)) <= 0) break;
190 convert(p,ccount);
191 }
192 }
193 succeed: nsucc = 1;
194 {
195 if (p <= nlp) {
196 outc(&buf[1024]);
197 nlp = buf;
198 }
199 outc(p);
200 }
201 nomatch:
202 nlp = p;
203 c = w;
204 begp = endp = 0;
205 continue;
206 }
207 cont:
208 if (*p++ == '.'){
209 if(hit){
210 if(p <= nlp){
211 outc(&buf[1024]);
212 nlp = buf;
213 }
214 outc(p);
215 putchar('\n'); putchar('\n');
216 }
217 hit = 0;
218 oct = 0;
219 nlp = p;
220 c = w;
221 begp = endp = 0;
222 }
223 }
224 close(f);
225}
226
227getargc()
228{
229 register c;
230 if (wordf){
231 if((c=getc(wordf))==EOF){
232 fclose(wordf);
233 if(nflag && fflag){
234 nflag=0;
235 wordf=fopen(*filename,"r");
236 if(wordf == NULL){
237 fprintf("can't open %s\n",filename);
238 exit(2);
239 }
240 return(getc(wordf));
241 }
242 else return(EOF);
243 }
244 else return(c);
245 }
246 if ((c = *argptr++) == '\0')
247 return(EOF);
248 return(c);
249}
250
251cgotofn() {
252 register c;
253 register struct words *s;
254 register ct;
255 int neg;
256
257 s = smax = w;
258 neg = ct = 0;
259nword: for(;;) {
260 c = getargc();
261 if(c == '~'){
262 neg++;
263 c = getargc();
264 }
265 if (c==EOF)
266 return;
267 if (c == '\n') {
268 if(neg)s->out = 0377;
269 else s->out = ct-1;
270 neg = ct = 0;
271 s = w;
272 } else {
273 loop: if (s->inp == c) {
274 s = s->nst;
275 ct++;
276 continue;
277 }
278 if (s->inp == 0) goto enter;
279 if (s->link == 0) {
280 if (smax >= &w[MAXSIZ - 1]) overflo();
281 s->link = ++smax;
282 s = smax;
283 goto enter;
284 }
285 s = s->link;
286 goto loop;
287 }
288 }
289
290 enter:
291 do {
292 s->inp = c;
293 ct++;
294 if (smax >= &w[MAXSIZ - 1]) overflo();
295 s->nst = ++smax;
296 s = smax;
297 } while ((c = getargc()) != '\n' && c!=EOF);
298 if(neg)smax->out = 0377;
299 else smax->out = ct-1;
300 neg = ct = 0;
301 s = w;
302 if (c != EOF)
303 goto nword;
304}
305
306overflo() {
307 fprintf(stderr, "wordlist too large\n");
308 exit(2);
309}
310cfail() {
311 struct words *queue[QSIZE];
312 struct words **front, **rear;
313 struct words *state;
314 int bstart;
315 register char c;
316 register struct words *s;
317 s = w;
318 front = rear = queue;
319init: if ((s->inp) != 0) {
320 *rear++ = s->nst;
321 if (rear >= &queue[QSIZE - 1]) overflo();
322 }
323 if ((s = s->link) != 0) {
324 goto init;
325 }
326
327 while (rear!=front) {
328 s = *front;
329 if (front == &queue[QSIZE-1])
330 front = queue;
331 else front++;
332 cloop: if ((c = s->inp) != 0) {
333 bstart=0;
334 *rear = (q = s->nst);
335 if (front < rear)
336 if (rear >= &queue[QSIZE-1])
337 if (front == queue) overflo();
338 else rear = queue;
339 else rear++;
340 else
341 if (++rear == front) overflo();
342 state = s->fail;
343 floop: if (state == 0){ state = w;bstart=1;}
344 if (state->inp == c) {
345 qloop: q->fail = state->nst;
346 if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
347 if((q=q->link) != 0)goto qloop;
348 }
349 else if ((state = state->link) != 0)
350 goto floop;
351 else if(bstart==0){state=0; goto floop;}
352 }
353 if ((s = s->link) != 0)
354 goto cloop;
355 }
356/* for(s=w;s<=smax;s++)
357 printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
358 s->inp,s->out,s->nst,s->link,s->fail);
359*/
360}
361convert(p,ccount)
362char *p;
363{
364 int ct;
365 char *pt;
366 for(pt=p,ct=ccount;--ct>=0;pt++){
367 if(isupper(*pt))*pt=tolower(*pt);
368 else if(isspace(*pt))*pt=' ';
369 else if(*pt=='.' || *pt=='?'||*pt=='!'){
370 *pt='.';
371 nsent++;
372 }
373 else if(ispunct(*pt))*pt=' ';
374 }
375}
376outc(addr)
377char *addr;
378{
379
380 while(nlp < addr){
381 if(oct++ > 70 && *nlp == ' ' && nlp != begp && nlp != endp){
382 oct=0;
383 putchar('\n');
384 }
385 if(nlp == begp){
386 putchar('[');
387 }
388 putchar(*nlp);
389 if(nlp == endp){
390 putchar(']');
391 }
392 nlp++;
393 }
394}